From 04c3881fdc7f3fdf1364ce532d3a90013040df1d Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 16 Sep 2024 20:41:21 +0000 Subject: [PATCH 01/56] Add SubscriberOptions and OpenTelemetry Subscriber option --- google/cloud/pubsub_v1/subscriber/client.py | 13 ++++++++++++- google/cloud/pubsub_v1/types.py | 16 ++++++++++++++++ .../subscriber/test_subscriber_client.py | 19 +++++++++++++++++++ 3 files changed, 47 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/client.py b/google/cloud/pubsub_v1/subscriber/client.py index 0d0d36a0c..205c1ee3f 100644 --- a/google/cloud/pubsub_v1/subscriber/client.py +++ b/google/cloud/pubsub_v1/subscriber/client.py @@ -67,7 +67,16 @@ class Client(subscriber_client.SubscriberClient): ) """ - def __init__(self, **kwargs: Any): + def __init__( + self, + subscriber_options: Union[types.SubscriberOptions, Sequence] = (), + **kwargs: Any + ): + assert ( + isinstance(subscriber_options, types.SubscriberOptions) + or len(subscriber_options) == 0 + ), "subscriber_options must be of type SubscriberOptions or an empty sequence." + # Sanity check: Is our goal to use the emulator? # If so, create a grpc insecure channel with the emulator host # as the target. @@ -82,6 +91,8 @@ def __init__(self, **kwargs: Any): self._target = self._transport._host self._closed = False + self.subscriber_options = types.SubscriberOptions(*subscriber_options) + @classmethod def from_service_account_file( # type: ignore[override] cls, filename: str, **kwargs: Any diff --git a/google/cloud/pubsub_v1/types.py b/google/cloud/pubsub_v1/types.py index c4282e685..342ffeb4f 100644 --- a/google/cloud/pubsub_v1/types.py +++ b/google/cloud/pubsub_v1/types.py @@ -131,6 +131,22 @@ class PublishFlowControl(NamedTuple): """The action to take when publish flow control limits are exceeded.""" +# Define the default subscriber options. +# +# This class is used when creating a subscriber client to pass in options +# to enable/disable features. +class SubscriberOptions(NamedTuple): + """ + Options for the subscriber client. + Attributes: + enable_open_telemetry_tracing (bool): + Whether to enable OpenTelemetry tracing. Defaults to False. + """ + + enable_open_telemetry_tracing: bool = False + """Whether to enable OpenTelemetry tracing.""" + + # Define the default publisher options. # # This class is used when creating a publisher client to pass in options diff --git a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py index 16a6150af..759f8015b 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py @@ -317,3 +317,22 @@ async def test_sync_pull_warning_if_return_immediately_async(creds): warning_msg = str(warned[0].message) assert "return_immediately" in warning_msg assert "deprecated" in warning_msg + + +@pytest.mark.parametrize( + "enable_open_telemetry_tracing", + [ + True, + False, + ], +) +def test_subscriber_settings(creds, enable_open_telemetry_tracing): + options = types.SubscriberOptions( + enable_open_telemetry_tracing=enable_open_telemetry_tracing + ) + client = subscriber.Client(subscriber_options=options, credentials=creds) + + assert ( + client.subscriber_options.enable_open_telemetry_tracing + == enable_open_telemetry_tracing + ) From 0a679a5b48e923723ad842e1e000e29bbe3c2e95 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 16 Sep 2024 21:07:29 +0000 Subject: [PATCH 02/56] Set / Override OpenTelemetry Subscriber settings based on Python Version --- google/cloud/pubsub_v1/subscriber/client.py | 18 +++++++++++++++ .../subscriber/test_subscriber_client.py | 23 +++++++++++-------- 2 files changed, 32 insertions(+), 9 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/client.py b/google/cloud/pubsub_v1/subscriber/client.py index 205c1ee3f..23538a0ac 100644 --- a/google/cloud/pubsub_v1/subscriber/client.py +++ b/google/cloud/pubsub_v1/subscriber/client.py @@ -14,6 +14,7 @@ from __future__ import absolute_import +import sys import os import typing from typing import cast, Any, Callable, Optional, Sequence, Union @@ -93,6 +94,23 @@ def __init__( self.subscriber_options = types.SubscriberOptions(*subscriber_options) + # Set / override Open Telemetry option. + self._open_telemetry_enabled = ( + self.subscriber_options.enable_open_telemetry_tracing + ) + # OpenTelemetry features used by the library are not supported in Python versions <= 3.7. + # Refer https://github.com/open-telemetry/opentelemetry-python/issues/3993#issuecomment-2211976389 + if ( + self.subscriber_options.enable_open_telemetry_tracing + and sys.version_info.major == 3 + and sys.version_info.minor < 8 + ): + warnings.warn( + message="Open Telemetry for Python version 3.7 or lower is not supported. Disabling Open Telemetry tracing.", + category=RuntimeWarning, + ) + self._open_telemetry_enabled = False + @classmethod def from_service_account_file( # type: ignore[override] cls, filename: str, **kwargs: Any diff --git a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py index 759f8015b..3f0d65dce 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py @@ -320,19 +320,24 @@ async def test_sync_pull_warning_if_return_immediately_async(creds): @pytest.mark.parametrize( - "enable_open_telemetry_tracing", + "enable_open_telemetry", [ True, False, ], ) -def test_subscriber_settings(creds, enable_open_telemetry_tracing): +def test_opentelemetry_subscriber_setting(creds, enable_open_telemetry): options = types.SubscriberOptions( - enable_open_telemetry_tracing=enable_open_telemetry_tracing - ) - client = subscriber.Client(subscriber_options=options, credentials=creds) - - assert ( - client.subscriber_options.enable_open_telemetry_tracing - == enable_open_telemetry_tracing + enable_open_telemetry_tracing=enable_open_telemetry, ) + if sys.version_info >= (3, 8) or enable_open_telemetry is False: + client = subscriber.Client(credentials=creds, subscriber_options=options) + assert client.subscriber_options == options + assert client._open_telemetry_enabled == enable_open_telemetry + else: + with pytest.warns( + RuntimeWarning, + match="Open Telemetry for Python version 3.7 or lower is not supported. Disabling Open Telemetry tracing.", + ): + client = subscriber.Client(credentials=creds, subscriber_options=options) + assert client._open_telemetry_enabled is False From 40a2996ad6956008e8e602c3c486846588bd30af Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 16 Sep 2024 21:14:31 +0000 Subject: [PATCH 03/56] Add OpenTelemetryContextGetter used for trace propagation --- .../open_telemetry/context_propagation.py | 18 ++++++++++++- .../subscriber/test_subscriber_client.py | 25 +++++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/context_propagation.py b/google/cloud/pubsub_v1/open_telemetry/context_propagation.py index 37fad3e20..bfa1aa638 100644 --- a/google/cloud/pubsub_v1/open_telemetry/context_propagation.py +++ b/google/cloud/pubsub_v1/open_telemetry/context_propagation.py @@ -12,7 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from opentelemetry.propagators.textmap import Setter +from typing import Optional, List + +from opentelemetry.propagators.textmap import Setter, Getter from google.pubsub_v1 import PubsubMessage @@ -37,3 +39,17 @@ def set(self, carrier: PubsubMessage, key: str, value: str) -> None: None """ carrier.attributes["googclient_" + key] = value + + +class OpenTelemetryContextGetter(Getter): + """ + Used by Open Telemetry for context propagation. + """ + + def get(self, carrier: PubsubMessage, key: str) -> Optional[List[str]]: + if ("googclient_" + key) not in carrier.attributes: + return None + return [carrier.attributes["googclient_" + key]] + + def keys(self, carrier: PubsubMessage) -> List[str]: + return list(map(str, carrier.attributes.keys())) diff --git a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py index 3f0d65dce..7c0ebfd83 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscriber_client.py @@ -30,6 +30,10 @@ from google.cloud.pubsub_v1.subscriber import futures from google.pubsub_v1.services.subscriber import client as subscriber_client from google.pubsub_v1.services.subscriber.transports.grpc import SubscriberGrpcTransport +from google.cloud.pubsub_v1.open_telemetry.context_propagation import ( + OpenTelemetryContextGetter, +) +from google.pubsub_v1.types import PubsubMessage def test_init_default_client_info(creds): @@ -341,3 +345,24 @@ def test_opentelemetry_subscriber_setting(creds, enable_open_telemetry): ): client = subscriber.Client(credentials=creds, subscriber_options=options) assert client._open_telemetry_enabled is False + + +def test_opentelemetry_propagator_get(): + message = PubsubMessage(data=b"foo") + message.attributes["key1"] = "value1" + message.attributes["googclient_key2"] = "value2" + + assert OpenTelemetryContextGetter().get(message, "key2") == ["value2"] + + assert OpenTelemetryContextGetter().get(message, "key1") is None + + +def test_opentelemetry_propagator_keys(): + message = PubsubMessage(data=b"foo") + message.attributes["key1"] = "value1" + message.attributes["googclient_key2"] = "value2" + + assert sorted(OpenTelemetryContextGetter().keys(message)) == [ + "googclient_key2", + "key1", + ] From b10f2f63ca924af0c469d0b5f3954d5d8442404c Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 17:02:58 +0000 Subject: [PATCH 04/56] Add Subscribe Span and SubscribeOpenTelemetry class --- .../open_telemetry/subscribe_opentelemetry.py | 69 +++++++++++++++++++ .../_protocol/streaming_pull_manager.py | 15 ++++ google/cloud/pubsub_v1/subscriber/client.py | 7 ++ .../subscriber/test_streaming_pull_manager.py | 48 +++++++++++-- 4 files changed, 135 insertions(+), 4 deletions(-) create mode 100644 google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py new file mode 100644 index 000000000..f1c7d27ea --- /dev/null +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -0,0 +1,69 @@ +# Copyright 2024, Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional + +from opentelemetry import trace +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator + +from google.cloud.pubsub_v1.open_telemetry.context_propagation import ( + OpenTelemetryContextGetter, +) +from google.pubsub_v1.types import PubsubMessage + + +class SubscribeOpenTelemetry: + _OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" + _OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" + + def __init__(self, message: PubsubMessage): + self._message: PubsubMessage = message + + # subscribe span will be initialized by the `start_subscribe_span` + # method. + self._subscribe_span: Optional[trace.Span] = None + + def start_subscribe_span( + self, + subscription: str, + exactly_once_enabled: bool, + ack_id: str, + delivery_attempt: int, + ) -> None: + tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + parent_span_context = TraceContextTextMapPropagator().extract( + carrier=self._message, + getter=OpenTelemetryContextGetter(), + ) + assert len(subscription.split("/")) == 4 + subscription_short_name = subscription.split("/")[3] + with tracer.start_as_current_span( + name=f"{subscription_short_name} subscribe", + context=parent_span_context if parent_span_context else None, + kind=trace.SpanKind.CONSUMER, + attributes={ + "messaging.system": self._OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.destination.name": subscription_short_name, + "gcp.project_id": subscription.split("/")[1], + "messaging.message.id": self._message.message_id, + "messaging.message.body.size": len(self._message.data), + "messaging.gcp_pubsub.message.ack_id": ack_id, + "messaging.gcp_pubsub.message.ordering_key": self._message.ordering_key, + "messaging.gcp_pubsub.message.exactly_once_delivery": exactly_once_enabled, + "code.function": "_on_response", + "messaging.gcp_pubsub.message.delivery_attempt": delivery_attempt, + }, + end_on_exit=False, + ) as subscribe_span: + self._subscribe_span = subscribe_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index c01dd7f2e..ede0a3540 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -38,6 +38,9 @@ AcknowledgeError, AcknowledgeStatus, ) +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) import google.cloud.pubsub_v1.subscriber.message from google.cloud.pubsub_v1.subscriber import futures from google.cloud.pubsub_v1.subscriber.scheduler import ThreadScheduler @@ -1075,6 +1078,18 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # protobuf message to significantly gain on attribute access performance. received_messages = response._pb.received_messages + subscribe_open_telemetry: List[SubscribeOpenTelemetry] = [] + if self._client.open_telemetry_enabled: + for received_message in received_messages: + opentelemetry_data = SubscribeOpenTelemetry(received_message.message) + opentelemetry_data.start_subscribe_span( + self._subscription, + response.subscription_properties.exactly_once_delivery_enabled, + received_message.ack_id, + received_message.delivery_attempt, + ) + subscribe_open_telemetry.append(opentelemetry_data) + _LOGGER.debug( "Processing %s received message(s), currently on hold %s (bytes %s).", len(received_messages), diff --git a/google/cloud/pubsub_v1/subscriber/client.py b/google/cloud/pubsub_v1/subscriber/client.py index 23538a0ac..175095077 100644 --- a/google/cloud/pubsub_v1/subscriber/client.py +++ b/google/cloud/pubsub_v1/subscriber/client.py @@ -111,6 +111,13 @@ def __init__( ) self._open_telemetry_enabled = False + @property + def open_telemetry_enabled(self) -> bool: + """ + Returns True if Open Telemetry is enabled. False otherwise. + """ + return self._open_telemetry_enabled # pragma: NO COVER + @classmethod def from_service_account_file( # type: ignore[override] cls, filename: str, **kwargs: Any diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index d4ce2cfdb..7774a805f 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -179,11 +179,16 @@ def test_constructor_with_max_duration_per_lease_extension_too_high(): assert manager._stream_ack_deadline == 600 -def make_manager(**kwargs): +def make_manager( + enable_open_telemetry: bool = False, + subscription_name: str = "subscription-name", + **kwargs, +): client_ = mock.create_autospec(client.Client, instance=True) + client_.open_telemetry_enabled = enable_open_telemetry scheduler_ = mock.create_autospec(scheduler.Scheduler, instance=True) return streaming_pull_manager.StreamingPullManager( - client_, "subscription-name", scheduler=scheduler_, **kwargs + client_, subscription_name, scheduler=scheduler_, **kwargs ) @@ -1224,8 +1229,12 @@ def test_open_has_been_closed(): manager.open(mock.sentinel.callback, mock.sentinel.on_callback_error) -def make_running_manager(**kwargs): - manager = make_manager(**kwargs) +def make_running_manager( + enable_open_telemetry: bool = False, + subscription_name: str = "subscription-name", + **kwargs, +): + manager = make_manager(enable_open_telemetry, subscription_name, **kwargs) manager._consumer = mock.create_autospec(bidi.BackgroundConsumer, instance=True) manager._consumer.is_active = True manager._dispatcher = mock.create_autospec(dispatcher.Dispatcher, instance=True) @@ -2626,3 +2635,34 @@ def test_process_requests_mixed_success_and_failure_modacks(): # message with ack_id 'ackid3' succeeds assert requests_completed[1].ack_id == "ackid3" assert future3.result() == subscriber_exceptions.AcknowledgeStatus.SUCCESS + + +def test_opentelemetry__on_response_subscribe_span_create(span_exporter): + manager, _, _, leaser, _, _ = make_running_manager( + enable_open_telemetry=True, + subscription_name="projects/projectID/subscriptions/subscriptionID", + ) + + fake_leaser_add(leaser, init_msg_count=0, assumed_msg_size=42) + manager._callback = mock.sentinel.callback + + response = gapic_types.StreamingPullResponse( + received_messages=[ + gapic_types.ReceivedMessage( + ack_id="ack1", + message=gapic_types.PubsubMessage(data=b"foo", message_id="1"), + ), + gapic_types.ReceivedMessage( + ack_id="ack2", + message=gapic_types.PubsubMessage(data=b"bar", message_id="2"), + delivery_attempt=6, + ), + ] + ) + + manager._on_response(response) + + spans = span_exporter.get_finished_spans() + + # Subscribe span is still active, hence unexported. + assert len(spans) == 0 From 95a1d32d7267e6d610d26e5dbc13a8bce0a8f6ef Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 17:08:18 +0000 Subject: [PATCH 05/56] Add receipt modack start and end events to subscribe span --- .../open_telemetry/subscribe_opentelemetry.py | 10 ++++++++++ .../subscriber/_protocol/streaming_pull_manager.py | 10 ++++++++-- 2 files changed, 18 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index f1c7d27ea..6347b3963 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -13,6 +13,7 @@ # limitations under the License. from typing import Optional +from datetime import datetime from opentelemetry import trace from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator @@ -67,3 +68,12 @@ def start_subscribe_span( end_on_exit=False, ) as subscribe_span: self._subscribe_span = subscribe_span + + def add_subscribe_span_event(self, event: str) -> None: + assert self._subscribe_span is not None + self._subscribe_span.add_event( + name=event, + attributes={ + "timestamp": str(datetime.now()), + }, + ) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index ede0a3540..50edd9c66 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1078,7 +1078,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # protobuf message to significantly gain on attribute access performance. received_messages = response._pb.received_messages - subscribe_open_telemetry: List[SubscribeOpenTelemetry] = [] + subscribe_opentelemetry: List[SubscribeOpenTelemetry] = [] if self._client.open_telemetry_enabled: for received_message in received_messages: opentelemetry_data = SubscribeOpenTelemetry(received_message.message) @@ -1088,7 +1088,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: received_message.ack_id, received_message.delivery_attempt, ) - subscribe_open_telemetry.append(opentelemetry_data) + subscribe_opentelemetry.append(opentelemetry_data) _LOGGER.debug( "Processing %s received message(s), currently on hold %s (bytes %s).", @@ -1113,10 +1113,16 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. + if self._client.open_telemetry_enabled: + for opentelemetry_data in subscribe_opentelemetry: + opentelemetry_data.add_subscribe_span_event("modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( ack_id_gen, self.ack_deadline, warn_on_invalid=False ) + if self._client.open_telemetry_enabled: + for opentelemetry_data in subscribe_opentelemetry: + opentelemetry_data.add_subscribe_span_event("modack end") with self._pause_resume_lock: if self._scheduler is None or self._leaser is None: From c556f99c8e76f487a0735c86b722b797e21b17a2 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 18:21:24 +0000 Subject: [PATCH 06/56] Modify SubscriberMessage to include opentelemetry data and include SubscribeSpan information in the SubscriberMessage's Open Telemetry data --- .../_protocol/streaming_pull_manager.py | 4 ++++ google/cloud/pubsub_v1/subscriber/message.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 50edd9c66..1184f5a51 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1131,6 +1131,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: ) return + i: int = 0 for received_message in received_messages: if ( not self._exactly_once_delivery_enabled() @@ -1143,6 +1144,9 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: self._scheduler.queue, self._exactly_once_delivery_enabled, ) + if self._client.open_telemetry_enabled: + message.opentelemetry_data = subscribe_opentelemetry[i] + i = i + 1 self._messages_on_hold.put(message) self._on_hold_bytes += message.size req = requests.LeaseRequest( diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index f744966a2..a5628de90 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -24,6 +24,9 @@ from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber import futures from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) if typing.TYPE_CHECKING: # pragma: NO COVER @@ -85,6 +88,8 @@ class Message(object): information on this type. publish_time (google.protobuf.timestamp_pb2.Timestamp): The time that this message was originally published. + opentelemetry_data (google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry.SubscribeOpenTelemetry) + Open Telemetry data associated with this message. None if Open Telemetry is not enabled. """ def __init__( @@ -144,6 +149,9 @@ def __init__( self._ordering_key = message.ordering_key self._size = message.ByteSize() + # None if Open Telemetry is disabled. Else contains OpenTelemetry data. + self._opentelemetry_data: SubscribeOpenTelemetry = None + def __repr__(self): # Get an abbreviated version of the data. abbv_data = self._message.data @@ -158,6 +166,14 @@ def __repr__(self): pretty_attrs = pretty_attrs.lstrip() return _MESSAGE_REPR.format(abbv_data, str(self.ordering_key), pretty_attrs) + @property + def opentelemetry_data(self): + return self._opentelemetry_data # pragma: NO COVER + + @opentelemetry_data.setter + def opentelemetry_data(self, data): + self._opentelemetry_data = data # pragma: NO COVER + @property def attributes(self) -> "containers.ScalarMap": """Return the attributes of the underlying Pub/Sub Message. From f52fbac9f0f7aa76844bd2d9df3c2166728fa77a Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 18:47:19 +0000 Subject: [PATCH 07/56] Add ack, modack, nack start events to subscribe span --- google/cloud/pubsub_v1/subscriber/message.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index a5628de90..990549013 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -268,6 +268,8 @@ def ack(self) -> None: https://cloud.google.com/pubsub/docs/exactly-once-delivery." """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("ack start") time_to_ack = math.ceil(time.time() - self._received_timestamp) self._request_queue.put( requests.AckRequest( @@ -318,6 +320,8 @@ def ack_with_response(self) -> "futures.Future": pubsub_v1.subscriber.exceptions.AcknowledgeError exception will be thrown. """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("ack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() @@ -372,6 +376,8 @@ def modify_ack_deadline(self, seconds: int) -> None: between 0 and 600. Due to network latency, values below 10 are advised against. """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("modack start") self._request_queue.put( requests.ModAckRequest(ack_id=self._ack_id, seconds=seconds, future=None) ) @@ -422,6 +428,8 @@ def modify_ack_deadline_with_response(self, seconds: int) -> "futures.Future": will be thrown. """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("modack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() @@ -445,6 +453,8 @@ def nack(self) -> None: may take place immediately or after a delay, and may arrive at this subscriber or another. """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("nack start") self._request_queue.put( requests.NackRequest( ack_id=self._ack_id, @@ -488,6 +498,8 @@ def nack_with_response(self) -> "futures.Future": will be thrown. """ + if self.opentelemetry_data: + self.opentelemetry_data.add_subscribe_span_event("nack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() From b5ab6bb96e87cfbc731f8cff10a97cf777189ed7 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 18:51:38 +0000 Subject: [PATCH 08/56] Add Open Telemetry data to Ack, Modack, Nack request objects * This is required for the Open Telemetry information to be percolated to the dispatcher which performs acks, nacks and modacks async. * The Open Telemetry data passed into the dispatcher will be used to add events such as ack, modack, nack end to the subscribe span --- google/cloud/pubsub_v1/subscriber/_protocol/requests.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index 9cd387545..e1ec8b6a9 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -15,6 +15,10 @@ import typing from typing import NamedTuple, Optional +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) + if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1.subscriber import futures @@ -27,6 +31,7 @@ class AckRequest(NamedTuple): time_to_ack: float ordering_key: Optional[str] future: Optional["futures.Future"] + opentelemetry_data: Optional[SubscribeOpenTelemetry] = None class DropRequest(NamedTuple): @@ -45,6 +50,7 @@ class ModAckRequest(NamedTuple): ack_id: str seconds: float future: Optional["futures.Future"] + opentelemetry_data: Optional[SubscribeOpenTelemetry] = None class NackRequest(NamedTuple): @@ -52,3 +58,4 @@ class NackRequest(NamedTuple): byte_size: int ordering_key: Optional[str] future: Optional["futures.Future"] + opentelemetry_data: Optional[SubscribeOpenTelemetry] = None From a695c99f1c08ea24529a616dee186b0671a6cc7d Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 19:15:17 +0000 Subject: [PATCH 09/56] Add ack result, ack end event to subscribe span and end it --- .../open_telemetry/subscribe_opentelemetry.py | 11 +++++++++++ .../pubsub_v1/subscriber/_protocol/dispatcher.py | 13 +++++++++++++ 2 files changed, 24 insertions(+) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 6347b3963..f30cda5de 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -77,3 +77,14 @@ def add_subscribe_span_event(self, event: str) -> None: "timestamp": str(datetime.now()), }, ) + + def end_subscribe_span(self) -> None: + assert self._subscribe_span is not None + self._subscribe_span.end() + + def set_subscribe_span_result(self, result: str) -> None: + assert self._subscribe_span is not None + self._subscribe_span.set_attribute( + key="messaging.gcp_pubsub.result", + value=result, + ) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 15ad4abb3..fc5193555 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -243,6 +243,12 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: ack_reqs_dict=ack_reqs_dict, ) + for completed_ack in requests_completed: + if completed_ack.opentelemetry_data: + completed_ack.opentelemetry_data.set_subscribe_span_result("acked") + completed_ack.opentelemetry_data.add_subscribe_span_event("ack end") + completed_ack.opentelemetry_data.end_subscribe_span() + # Remove the completed messages from lease management. self.drop(requests_completed) @@ -286,6 +292,13 @@ def _retry_acks(self, requests_to_retry): ack_ids=[req.ack_id for req in requests_to_retry], ack_reqs_dict=ack_reqs_dict, ) + + for completed_ack in requests_completed: + if completed_ack.opentelemetry_data: + completed_ack.opentelemetry_data.set_subscribe_span_result("acked") + completed_ack.opentelemetry_data.add_subscribe_span_event("ack end") + completed_ack.opentelemetry_data.end_subscribe_span() + assert ( len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE ), "Too many requests to be retried." From 694cfde9ac380a57ec4d55c9bf8fba70ca747017 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 20:00:58 +0000 Subject: [PATCH 10/56] Add tests for ack and ack_with_response methods of Subscriber Message * Both these methods should add the "ack start" event to the subscribe span --- .../unit/pubsub_v1/subscriber/test_message.py | 46 +++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 49b07b7fd..38676c7b0 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -16,6 +16,7 @@ import queue import sys import time +import pytest # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -29,6 +30,9 @@ from google.protobuf import timestamp_pb2 from google.pubsub_v1 import types as gapic_types from google.cloud.pubsub_v1.subscriber.exceptions import AcknowledgeStatus +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) @@ -131,6 +135,48 @@ def check_call_types(mock, *args, **kwargs): assert isinstance(call_args[n], argtype) +def test_opentelemetry_ack(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.ack() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "ack start" + + +def test_opentelemetry_ack_with_response(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.ack_with_response() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "ack start" + + def test_ack(): msg = create_message(b"foo", ack_id="bogus_ack_id") with mock.patch.object(msg._request_queue, "put") as put: From 27edc0ff2135e0cc20ffa0fc260aced278b78297 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 20:03:35 +0000 Subject: [PATCH 11/56] Add tests for nack and nack_with_response methods of Subscriber Message * Both these methods should add "nack start" events to the subscribe span --- .../unit/pubsub_v1/subscriber/test_message.py | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 38676c7b0..37e7adcf8 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -177,6 +177,48 @@ def test_opentelemetry_ack_with_response(span_exporter): assert spans[0].events[0].name == "ack start" +def test_opentelemetry_nack(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.nack() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "nack start" + + +def test_opentelemetry_nack_with_response(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.nack_with_response() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "nack start" + + def test_ack(): msg = create_message(b"foo", ack_id="bogus_ack_id") with mock.patch.object(msg._request_queue, "put") as put: From c1f0ad8468eabdf7a11ecd00fba42777b185d9ef Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 20:07:24 +0000 Subject: [PATCH 12/56] Add tests for modify_ack_deadline and modify_ack_deadline_with_response * Both these methods should add the "modack start" event to the subscribe span --- .../unit/pubsub_v1/subscriber/test_message.py | 43 ++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 37e7adcf8..236453bac 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -16,7 +16,6 @@ import queue import sys import time -import pytest # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -219,6 +218,48 @@ def test_opentelemetry_nack_with_response(span_exporter): assert spans[0].events[0].name == "nack start" +def test_opentelemetry_modack(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.modify_ack_deadline(3) + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "modack start" + + +def test_opentelemetry_modack_with_response(span_exporter): + SUBSCRIPTION = "projects/projectID/subscriptions/subscriptionID" + msg = create_message(b"data", ack_id="ack_id") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription=SUBSCRIPTION, + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=2, + ) + msg.opentelemetry_data = opentelemetry_data + msg.modify_ack_deadline_with_response(3) + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + assert len(spans[0].events) == 1 + assert spans[0].events[0].name == "modack start" + + def test_ack(): msg = create_message(b"foo", ack_id="bogus_ack_id") with mock.patch.object(msg._request_queue, "put") as put: From e97b83cd5a09735588aee03c526707dab6a6c6f5 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 21:05:39 +0000 Subject: [PATCH 13/56] Add tests for set_subscribe_span_result method of Subscribe OpenTelemetry --- .../test_subscribe_opentelemetry.py | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py new file mode 100644 index 000000000..0271cb456 --- /dev/null +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -0,0 +1,98 @@ +# Copyright 2024, Google LLC All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import datetime +import time +import sys +import queue +import pytest + +from google.protobuf import timestamp_pb2 +from google.api_core import datetime_helpers + +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) +from google.cloud.pubsub_v1.subscriber.message import Message +from google.cloud.pubsub_v1.types import PubsubMessage + +# special case python < 3.8 +if sys.version_info.major == 3 and sys.version_info.minor < 8: + import mock +else: + from unittest import mock + +RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) +RECEIVED_SECONDS = datetime_helpers.to_milliseconds(RECEIVED) // 1000 +PUBLISHED_MICROS = 123456 +PUBLISHED = RECEIVED + datetime.timedelta(days=1, microseconds=PUBLISHED_MICROS) +PUBLISHED_SECONDS = datetime_helpers.to_milliseconds(PUBLISHED) // 1000 + + +def create_message( + data, + ack_id="ACKID", + delivery_attempt=0, + ordering_key="", + exactly_once_delivery_enabled=False, + **attrs +): # pragma: NO COVER + with mock.patch.object(time, "time") as time_: + time_.return_value = RECEIVED_SECONDS + gapic_pubsub_message = PubsubMessage( + attributes=attrs, + data=data, + message_id="message_id", + publish_time=timestamp_pb2.Timestamp( + seconds=PUBLISHED_SECONDS, nanos=PUBLISHED_MICROS * 1000 + ), + ordering_key=ordering_key, + ) + msg = Message( + # The code under test uses a raw protobuf PubsubMessage, i.e. w/o additional + # Python class wrappers, hence the "_pb" + message=gapic_pubsub_message._pb, + ack_id=ack_id, + delivery_attempt=delivery_attempt, + request_queue=queue.Queue(), + exactly_once_delivery_enabled_func=lambda: exactly_once_delivery_enabled, + ) + return msg + + +def test_opentelemetry_set_subscribe_span_result(span_exporter): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + msg.opentelemetry_data = opentelemetry_data + opentelemetry_data.set_subscribe_span_result("acked") + opentelemetry_data.end_subscribe_span() + spans = span_exporter.get_finished_spans() + + assert len(spans) == 1 + + assert "messaging.gcp_pubsub.result" in spans[0].attributes + assert spans[0].attributes["messaging.gcp_pubsub.result"] == "acked" + + +def test_opentelemetry_set_subscribe_span_result_assert_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.set_subscribe_span_result("hi") From 8189b1228901ef331a96604f0b70f2d0967b5229 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 21:45:38 +0000 Subject: [PATCH 14/56] Add test for dispatcher ack method --- .../pubsub_v1/subscriber/test_dispatcher.py | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 89d72c61d..1db6a08c1 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -22,6 +22,10 @@ from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber._protocol import streaming_pull_manager from google.cloud.pubsub_v1.subscriber import futures +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) +from google.pubsub_v1.types import PubsubMessage # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -365,6 +369,43 @@ def test_unknown_request_type(): dispatcher_.dispatch_callback(items) +def test_opentelemetry_ack(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + items = [ + requests.AckRequest( + ack_id="ack_id_string", + byte_size=0, + time_to_ack=20, + ordering_key="", + future=None, + opentelemetry_data=opentelemetry_data, + ) + ] + manager.send_unary_ack.return_value = (items, []) + dispatcher_.ack(items) + + spans = span_exporter.get_finished_spans() + + assert len(spans) == 1 + subscribe_span = spans[0] + + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "ack end" + + def test_ack(): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True From 48ffe54f6425dda6fdcb7f09e63ec9f146c7a4fb Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 21:51:09 +0000 Subject: [PATCH 15/56] Add test for retry_acks method in dispatcher --- .../pubsub_v1/subscriber/test_dispatcher.py | 40 +++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 1db6a08c1..dce03769e 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -522,6 +522,46 @@ def test_retry_acks_in_new_thread(): assert ctor_call.kwargs["daemon"] +def test_opentelemetry_retry_acks(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + + f = futures.Future() + items = [ + requests.AckRequest( + ack_id="ack_id_string", + byte_size=0, + time_to_ack=20, + ordering_key="", + future=f, + opentelemetry_data=opentelemetry_data, + ) + ] + # first and second `send_unary_ack` calls fail, third one succeeds + manager.send_unary_ack.side_effect = [([], items), ([], items), (items, [])] + with mock.patch("time.sleep", return_value=None): + dispatcher_._retry_acks(items) + + spans = span_exporter.get_finished_spans() + + assert len(spans) == 1 + subscribe_span = spans[0] + + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "ack end" + + def test_retry_acks(): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True From d40b6767669c479598fb3a4fd0e9a95c0d026942 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 22:19:27 +0000 Subject: [PATCH 16/56] Pass the OpenTelemetry data to the dispatcher when message is acked, nacked or modacked --- google/cloud/pubsub_v1/subscriber/message.py | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 990549013..371e87497 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -278,6 +278,7 @@ def ack(self) -> None: time_to_ack=time_to_ack, ordering_key=self.ordering_key, future=None, + opentelemetry_data=self.opentelemetry_data, ) ) @@ -337,6 +338,7 @@ def ack_with_response(self) -> "futures.Future": time_to_ack=time_to_ack, ordering_key=self.ordering_key, future=req_future, + opentelemetry_data=self.opentelemetry_data, ) ) return future @@ -379,7 +381,12 @@ def modify_ack_deadline(self, seconds: int) -> None: if self.opentelemetry_data: self.opentelemetry_data.add_subscribe_span_event("modack start") self._request_queue.put( - requests.ModAckRequest(ack_id=self._ack_id, seconds=seconds, future=None) + requests.ModAckRequest( + ack_id=self._ack_id, + seconds=seconds, + future=None, + opentelemetry_data=self.opentelemetry_data, + ) ) def modify_ack_deadline_with_response(self, seconds: int) -> "futures.Future": @@ -440,7 +447,10 @@ def modify_ack_deadline_with_response(self, seconds: int) -> "futures.Future": self._request_queue.put( requests.ModAckRequest( - ack_id=self._ack_id, seconds=seconds, future=req_future + ack_id=self._ack_id, + seconds=seconds, + future=req_future, + opentelemetry_data=self.opentelemetry_data, ) ) @@ -461,6 +471,7 @@ def nack(self) -> None: byte_size=self.size, ordering_key=self.ordering_key, future=None, + opentelemetry_data=self.opentelemetry_data, ) ) @@ -514,6 +525,7 @@ def nack_with_response(self) -> "futures.Future": byte_size=self.size, ordering_key=self.ordering_key, future=req_future, + opentelemetry_data=self.opentelemetry_data, ) ) From 383ac2cb87652300c9642e155de35e6263d0a326 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 23:04:49 +0000 Subject: [PATCH 17/56] Record nack and modack end events, set their results and end them in dispatcher --- .../subscriber/_protocol/dispatcher.py | 29 ++++++- .../pubsub_v1/subscriber/test_dispatcher.py | 78 +++++++++++++++++++ 2 files changed, 104 insertions(+), 3 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index fc5193555..57ee8acb5 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -355,9 +355,10 @@ def modify_ack_deadline( for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } requests_to_retry: List[requests.ModAckRequest] + requests_completed: List[requests.ModAckRequest] = None if default_deadline is None: # no further work needs to be done for `requests_to_retry` - _, requests_to_retry = self._manager.send_unary_modack( + requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=list( itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE) ), @@ -368,7 +369,7 @@ def modify_ack_deadline( default_deadline=None, ) else: - _, requests_to_retry = self._manager.send_unary_modack( + requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=itertools.islice( ack_ids_gen, _ACK_IDS_BATCH_SIZE ), @@ -380,6 +381,25 @@ def modify_ack_deadline( len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE ), "Too many requests to be retried." + for completed_modack in requests_completed: + if completed_modack.opentelemetry_data: + # nack is a modack with 0 extension seconds. + if math.isclose(completed_modack.seconds, 0): + completed_modack.opentelemetry_data.set_subscribe_span_result( + "nacked" + ) + completed_modack.opentelemetry_data.add_subscribe_span_event( + "nack end" + ) + else: + completed_modack.opentelemetry_data.set_subscribe_span_result( + "modacked" + ) + completed_modack.opentelemetry_data.add_subscribe_span_event( + "modack end" + ) + completed_modack.opentelemetry_data.end_subscribe_span() + # Retry on a separate thread so the dispatcher thread isn't blocked # by sleeps. if requests_to_retry: @@ -418,7 +438,10 @@ def nack(self, items: Sequence[requests.NackRequest]) -> None: self.modify_ack_deadline( [ requests.ModAckRequest( - ack_id=item.ack_id, seconds=0, future=item.future + ack_id=item.ack_id, + seconds=0, + future=item.future, + opentelemetry_data=item.opentelemetry_data, ) for item in items ] diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index dce03769e..93e6715c9 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -369,6 +369,40 @@ def test_unknown_request_type(): dispatcher_.dispatch_callback(items) +def test_opentelemetry_modify_ack_deadline(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + + items = [ + requests.ModAckRequest( + ack_id="ack_id_string", + seconds=60, + future=None, + opentelemetry_data=opentelemetry_data, + ) + ] + manager.send_unary_modack.return_value = (items, []) + dispatcher_.modify_ack_deadline(items) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + subscribe_span = spans[0] + + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "modacked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "modack end" + + def test_opentelemetry_ack(span_exporter): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True @@ -714,6 +748,50 @@ def test_drop_ordered_messages(): manager.maybe_resume_consumer.assert_called_once() +def test_opentelemetry_nack(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + + items = [ + requests.NackRequest( + ack_id="ack_id_string", + byte_size=10, + ordering_key="", + future=None, + opentelemetry_data=opentelemetry_data, + ) + ] + response_items = [ + requests.ModAckRequest( + ack_id="ack_id_string", + seconds=0, + future=None, + opentelemetry_data=opentelemetry_data, + ) + ] + manager.send_unary_modack.return_value = (response_items, []) + dispatcher_.nack(items) + + spans = span_exporter.get_finished_spans() + + assert len(spans) == 1 + subscribe_span = spans[0] + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "nack end" + + def test_nack(): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True From 5ce0cf8cba99105587c5d0d7c9105c84154c423b Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Wed, 18 Sep 2024 23:28:38 +0000 Subject: [PATCH 18/56] Record modack, nack ends after successful retries --- .../subscriber/_protocol/dispatcher.py | 18 +++++ .../pubsub_v1/subscriber/test_dispatcher.py | 77 ++++++++++++++++++- 2 files changed, 93 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 57ee8acb5..b9dbde2c8 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -428,6 +428,24 @@ def _retry_modacks(self, requests_to_retry): modify_deadline_seconds=[req.seconds for req in requests_to_retry], ack_reqs_dict=ack_reqs_dict, ) + for completed_modack in requests_completed: + if completed_modack.opentelemetry_data: + # nack is a modack with 0 extension seconds. + if math.isclose(completed_modack.seconds, 0): + completed_modack.opentelemetry_data.set_subscribe_span_result( + "nacked" + ) + completed_modack.opentelemetry_data.add_subscribe_span_event( + "nack end" + ) + else: + completed_modack.opentelemetry_data.set_subscribe_span_result( + "modacked" + ) + completed_modack.opentelemetry_data.add_subscribe_span_event( + "modack end" + ) + completed_modack.opentelemetry_data.end_subscribe_span() def nack(self, items: Sequence[requests.NackRequest]) -> None: """Explicitly deny receipt of messages. diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 93e6715c9..5626fa8c2 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -580,8 +580,7 @@ def test_opentelemetry_retry_acks(span_exporter): opentelemetry_data=opentelemetry_data, ) ] - # first and second `send_unary_ack` calls fail, third one succeeds - manager.send_unary_ack.side_effect = [([], items), ([], items), (items, [])] + manager.send_unary_ack.side_effect = [(items, [])] with mock.patch("time.sleep", return_value=None): dispatcher_._retry_acks(items) @@ -659,6 +658,80 @@ def test_retry_modacks_in_new_thread(): assert ctor_call.kwargs["daemon"] +def test_opentelemetry_retry_modacks(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + + f = futures.Future() + items = [ + requests.ModAckRequest( + ack_id="ack_id_string", + seconds=20, + future=f, + opentelemetry_data=opentelemetry_data, + ) + ] + manager.send_unary_modack.side_effect = [(items, [])] + with mock.patch("time.sleep", return_value=None): + dispatcher_._retry_modacks(items) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + subscribe_span = spans[0] + + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "modacked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "modack end" + + +def test_opentelemetry_retry_nacks(span_exporter): + manager = mock.create_autospec( + streaming_pull_manager.StreamingPullManager, instance=True + ) + dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) + + opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + + f = futures.Future() + items = [ + requests.ModAckRequest( + ack_id="ack_id_string", + seconds=0, + future=f, + opentelemetry_data=opentelemetry_data, + ) + ] + manager.send_unary_modack.side_effect = [(items, [])] + with mock.patch("time.sleep", return_value=None): + dispatcher_._retry_modacks(items) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + subscribe_span = spans[0] + + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "nack end" + + def test_retry_modacks(): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True From 5b21c930eccb2331ec50e65c3e957c015fbde8fd Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Thu, 19 Sep 2024 15:12:10 +0000 Subject: [PATCH 19/56] Fix mypy errors --- google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py | 2 +- google/cloud/pubsub_v1/subscriber/message.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index b9dbde2c8..04bfda5cf 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -355,7 +355,7 @@ def modify_ack_deadline( for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } requests_to_retry: List[requests.ModAckRequest] - requests_completed: List[requests.ModAckRequest] = None + requests_completed: Optional[List[requests.ModAckRequest]] = None if default_deadline is None: # no further work needs to be done for `requests_to_retry` requests_completed, requests_to_retry = self._manager.send_unary_modack( diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 371e87497..c9b2effd7 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -150,7 +150,7 @@ def __init__( self._size = message.ByteSize() # None if Open Telemetry is disabled. Else contains OpenTelemetry data. - self._opentelemetry_data: SubscribeOpenTelemetry = None + self._opentelemetry_data: Optional[SubscribeOpenTelemetry] = None def __repr__(self): # Get an abbreviated version of the data. From 1b2020df6286ea4d7dc7a6f0134b1062c159a7df Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Thu, 19 Sep 2024 18:54:40 +0000 Subject: [PATCH 20/56] Add modack events for lease management and receipt modack --- .../pubsub_v1/subscriber/_protocol/leaser.py | 13 +++++- .../subscriber/_protocol/requests.py | 1 + .../_protocol/streaming_pull_manager.py | 28 +++++++++---- .../subscriber/test_streaming_pull_manager.py | 41 ++++++++++++++++++- 4 files changed, 73 insertions(+), 10 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 16018e384..32f37fea0 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -23,6 +23,9 @@ from typing import Dict, Iterable, Optional, Union from google.cloud.pubsub_v1.subscriber._protocol.dispatcher import _MAX_BATCH_LATENCY +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) try: from collections.abc import KeysView @@ -50,6 +53,7 @@ class _LeasedMessage(typing.NamedTuple): size: int ordering_key: Optional[str] + opentelemetry_data: Optional[SubscribeOpenTelemetry] class Leaser(object): @@ -98,6 +102,7 @@ def add(self, items: Iterable[requests.LeaseRequest]) -> None: sent_time=float("inf"), size=item.byte_size, ordering_key=item.ordering_key, + opentelemetry_data=item.opentelemetry_data, ) self._bytes += item.byte_size else: @@ -198,8 +203,14 @@ def maintain_leases(self) -> None: # is inactive. assert self._manager.dispatcher is not None ack_id_gen = (ack_id for ack_id in ack_ids) + opentelemetry_data = [ + message.opentelemetry_data + for message in list(leased_messages.values()) + ] expired_ack_ids = self._manager._send_lease_modacks( - ack_id_gen, deadline + ack_id_gen, + deadline, + opentelemetry_data, ) start_time = time.time() diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index e1ec8b6a9..6fd35896b 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -44,6 +44,7 @@ class LeaseRequest(NamedTuple): ack_id: str byte_size: int ordering_key: Optional[str] + opentelemetry_data: Optional[SubscribeOpenTelemetry] = None class ModAckRequest(NamedTuple): diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 1184f5a51..ffcb2539f 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1010,7 +1010,11 @@ def _get_initial_request( return request def _send_lease_modacks( - self, ack_ids: Iterable[str], ack_deadline: float, warn_on_invalid=True + self, + ack_ids: Iterable[str], + ack_deadline: float, + opentelemetry_data: List[SubscribeOpenTelemetry], + warn_on_invalid=True, ) -> Set[str]: exactly_once_enabled = False with self._exactly_once_enabled_lock: @@ -1020,6 +1024,12 @@ def _send_lease_modacks( requests.ModAckRequest(ack_id, ack_deadline, futures.Future()) for ack_id in ack_ids ] + if self._client.open_telemetry_enabled: + for item, data in zip( + items, opentelemetry_data + ): # pragma: NO COVER # Identical code covered in the same function below + data.add_subscribe_span_event("modack start") + item._replace(opentelemetry_data=data) assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) @@ -1046,6 +1056,10 @@ def _send_lease_modacks( requests.ModAckRequest(ack_id, self.ack_deadline, None) for ack_id in ack_ids ] + if self._client.open_telemetry_enabled: + for item, data in zip(items, opentelemetry_data): + data.add_subscribe_span_event("modack start") + item._replace(opentelemetry_data=data) assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) return set() @@ -1113,16 +1127,13 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: # Immediately (i.e. without waiting for the auto lease management) # modack the messages we received, as this tells the server that we've # received them. - if self._client.open_telemetry_enabled: - for opentelemetry_data in subscribe_opentelemetry: - opentelemetry_data.add_subscribe_span_event("modack start") ack_id_gen = (message.ack_id for message in received_messages) expired_ack_ids = self._send_lease_modacks( - ack_id_gen, self.ack_deadline, warn_on_invalid=False + ack_id_gen, + self.ack_deadline, + subscribe_opentelemetry, + warn_on_invalid=False, ) - if self._client.open_telemetry_enabled: - for opentelemetry_data in subscribe_opentelemetry: - opentelemetry_data.add_subscribe_span_event("modack end") with self._pause_resume_lock: if self._scheduler is None or self._leaser is None: @@ -1153,6 +1164,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: ack_id=message.ack_id, byte_size=message.size, ordering_key=message.ordering_key, + opentelemetry_data=message.opentelemetry_data, ) self._leaser.add([req]) diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 7774a805f..042178866 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -19,6 +19,10 @@ import time import types as stdlib_types +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) + # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: import mock @@ -579,6 +583,42 @@ def test__maybe_release_messages_negative_on_hold_bytes_warning(caplog): assert manager._on_hold_bytes == 0 # should be auto-corrected +def test_opentelemetry__send_lease_modacks(span_exporter): + manager, _, _, _, _, _ = make_running_manager(enable_open_telemetry=True) + data1 = SubscribeOpenTelemetry( + message=gapic_types.PubsubMessage(data=b"foo", message_id="1") + ) + data2 = SubscribeOpenTelemetry( + message=gapic_types.PubsubMessage(data=b"bar", message_id="2") + ) + + data1.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id1", + delivery_attempt=2, + ) + data2.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id1", + delivery_attempt=2, + ) + manager._send_lease_modacks( + ack_ids=["ack_id1", "ack_id2"], + ack_deadline=20, + opentelemetry_data=[data1, data2], + ) + data1.end_subscribe_span() + data2.end_subscribe_span() + spans = span_exporter.get_finished_spans() + assert len(spans) == 2 + + for span in spans: + assert len(span.events) == 1 + assert span.events[0].name == "modack start" + + def test_send_unary_ack(): manager = make_manager() @@ -1240,7 +1280,6 @@ def make_running_manager( manager._dispatcher = mock.create_autospec(dispatcher.Dispatcher, instance=True) manager._leaser = mock.create_autospec(leaser.Leaser, instance=True) manager._heartbeater = mock.create_autospec(heartbeater.Heartbeater, instance=True) - return ( manager, manager._consumer, From 26a86ec83d4261a43320b0a802e628370d164ab7 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Thu, 19 Sep 2024 22:37:34 +0000 Subject: [PATCH 21/56] Add drop event to subscribe span --- google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py | 4 ++++ google/cloud/pubsub_v1/subscriber/_protocol/leaser.py | 5 ++++- google/cloud/pubsub_v1/subscriber/_protocol/requests.py | 1 + .../pubsub_v1/subscriber/_protocol/streaming_pull_manager.py | 2 +- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 04bfda5cf..ddd0df09c 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -316,6 +316,10 @@ def drop( Args: items: The items to drop. """ + for item in items: + if item.opentelemetry_data: + item.opentelemetry_data.set_subscribe_span_result("dropped") + item.opentelemetry_data.end_subscribe_span() assert self._manager.leaser is not None self._manager.leaser.remove(items) ordering_keys = (k.ordering_key for k in items if k.ordering_key) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 32f37fea0..d9b8319cd 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -170,7 +170,9 @@ def maintain_leases(self) -> None: # and allow the Pub/Sub server to resend them. cutoff = time.time() - self._manager.flow_control.max_lease_duration to_drop = [ - requests.DropRequest(ack_id, item.size, item.ordering_key) + requests.DropRequest( + ack_id, item.size, item.ordering_key, item.opentelemetry_data + ) for ack_id, item in leased_messages.items() if item.sent_time < cutoff ] @@ -223,6 +225,7 @@ def maintain_leases(self) -> None: ack_id, leased_messages.get(ack_id).size, # type: ignore leased_messages.get(ack_id).ordering_key, # type: ignore + leased_messages.get(ack_id).opentelemetry_data, # type: ignore ) for ack_id in expired_ack_ids if ack_id in leased_messages diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index 6fd35896b..7736e4456 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -38,6 +38,7 @@ class DropRequest(NamedTuple): ack_id: str byte_size: int ordering_key: Optional[str] + opentelemetry_data: Optional[SubscribeOpenTelemetry] = None class LeaseRequest(NamedTuple): diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index ffcb2539f..afa7f7473 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1013,7 +1013,7 @@ def _send_lease_modacks( self, ack_ids: Iterable[str], ack_deadline: float, - opentelemetry_data: List[SubscribeOpenTelemetry], + opentelemetry_data: List[Optional[SubscribeOpenTelemetry]], warn_on_invalid=True, ) -> Set[str]: exactly_once_enabled = False From dbe1e504e6ee301f5e80610111c49ab840ab6ab6 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 16:10:28 +0000 Subject: [PATCH 22/56] Fix mypy errors --- google/cloud/pubsub_v1/subscriber/_protocol/leaser.py | 4 ++++ .../pubsub_v1/subscriber/_protocol/streaming_pull_manager.py | 4 +++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index d9b8319cd..70b149c4f 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -208,7 +208,11 @@ def maintain_leases(self) -> None: opentelemetry_data = [ message.opentelemetry_data for message in list(leased_messages.values()) + if message.opentelemetry_data ] + # for message in list(leased_messages.values()): + # if message.opentelemetry_data: + # opentelemetry_data.append(message.opentelemetry_data) expired_ack_ids = self._manager._send_lease_modacks( ack_id_gen, deadline, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index afa7f7473..82a2371d9 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1013,7 +1013,7 @@ def _send_lease_modacks( self, ack_ids: Iterable[str], ack_deadline: float, - opentelemetry_data: List[Optional[SubscribeOpenTelemetry]], + opentelemetry_data: List[SubscribeOpenTelemetry], warn_on_invalid=True, ) -> Set[str]: exactly_once_enabled = False @@ -1028,6 +1028,7 @@ def _send_lease_modacks( for item, data in zip( items, opentelemetry_data ): # pragma: NO COVER # Identical code covered in the same function below + assert data is not None data.add_subscribe_span_event("modack start") item._replace(opentelemetry_data=data) @@ -1058,6 +1059,7 @@ def _send_lease_modacks( ] if self._client.open_telemetry_enabled: for item, data in zip(items, opentelemetry_data): + assert data is not None data.add_subscribe_span_event("modack start") item._replace(opentelemetry_data=data) assert self._dispatcher is not None From ef0c39110dbf95d16b3a25e42733e889a0dcf391 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 17:58:19 +0000 Subject: [PATCH 23/56] Add subscriber concurrency control span --- .../open_telemetry/subscribe_opentelemetry.py | 20 ++++ .../_protocol/streaming_pull_manager.py | 4 + .../subscriber/test_streaming_pull_manager.py | 103 ++++++++++++++++++ .../test_subscribe_opentelemetry.py | 14 +++ 4 files changed, 141 insertions(+) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index f30cda5de..0e210e14a 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -17,6 +17,7 @@ from opentelemetry import trace from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from opentelemetry.trace.propagation import set_span_in_context from google.cloud.pubsub_v1.open_telemetry.context_propagation import ( OpenTelemetryContextGetter, @@ -35,6 +36,10 @@ def __init__(self, message: PubsubMessage): # method. self._subscribe_span: Optional[trace.Span] = None + # subscriber concurrency control span will be initialized by the + # `start_subscribe_concurrency_control_span` method. + self._concurrency_control_span: Optional[trace.Span] = None + def start_subscribe_span( self, subscription: str, @@ -88,3 +93,18 @@ def set_subscribe_span_result(self, result: str) -> None: key="messaging.gcp_pubsub.result", value=result, ) + + def start_subscribe_concurrency_control_span(self) -> None: + assert self._subscribe_span is not None + tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name="subscriber concurrency control", + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(self._subscribe_span), + end_on_exit=False, + ) as concurrency_control_span: + self._concurrency_control_span = concurrency_control_span + + def end_subscribe_concurrency_control_span(self) -> None: + assert self._concurrency_control_span is not None + self._concurrency_control_span.end() diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 82a2371d9..682569e77 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -126,6 +126,8 @@ def _wrap_callback_errors( message: The Pub/Sub message. """ try: + if message.opentelemetry_data: + message.opentelemetry_data.end_subscribe_concurrency_control_span() callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds @@ -621,6 +623,8 @@ def _schedule_message_on_hold( ) assert self._scheduler is not None assert self._callback is not None + if msg.opentelemetry_data: + msg.opentelemetry_data.start_subscribe_concurrency_control_span() self._scheduler.schedule(self._callback, msg) def send_unary_ack( diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 042178866..43c35cf57 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -18,10 +18,18 @@ import threading import time import types as stdlib_types +import datetime +import queue + +from google.protobuf import timestamp_pb2 +from google.api_core import datetime_helpers from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( SubscribeOpenTelemetry, ) +from google.cloud.pubsub_v1.subscriber.message import Message +from google.cloud.pubsub_v1.types import PubsubMessage + # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -2705,3 +2713,98 @@ def test_opentelemetry__on_response_subscribe_span_create(span_exporter): # Subscribe span is still active, hence unexported. assert len(spans) == 0 + + +RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) +RECEIVED_SECONDS = datetime_helpers.to_milliseconds(RECEIVED) // 1000 +PUBLISHED_MICROS = 123456 +PUBLISHED = RECEIVED + datetime.timedelta(days=1, microseconds=PUBLISHED_MICROS) +PUBLISHED_SECONDS = datetime_helpers.to_milliseconds(PUBLISHED) // 1000 + + +def create_message( + data, + ack_id="ACKID", + delivery_attempt=0, + ordering_key="", + exactly_once_delivery_enabled=False, + **attrs, +): # pragma: NO COVER + with mock.patch.object(time, "time") as time_: + time_.return_value = RECEIVED_SECONDS + gapic_pubsub_message = PubsubMessage( + attributes=attrs, + data=data, + message_id="message_id", + publish_time=timestamp_pb2.Timestamp( + seconds=PUBLISHED_SECONDS, nanos=PUBLISHED_MICROS * 1000 + ), + ordering_key=ordering_key, + ) + msg = Message( + # The code under test uses a raw protobuf PubsubMessage, i.e. w/o additional + # Python class wrappers, hence the "_pb" + message=gapic_pubsub_message._pb, + ack_id=ack_id, + delivery_attempt=delivery_attempt, + request_queue=queue.Queue(), + exactly_once_delivery_enabled_func=lambda: exactly_once_delivery_enabled, + ) + return msg + + +def test_opentelemetry_subscriber_concurrency_control_span(span_exporter): + manager, _, _, leaser, _, _ = make_running_manager( + enable_open_telemetry=True, + subscription_name="projects/projectID/subscriptions/subscriptionID", + ) + manager._callback = mock.Mock() + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + msg.opentelemetry_data = opentelemetry_data + manager._schedule_message_on_hold(msg) + opentelemetry_data.end_subscribe_concurrency_control_span() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + assert len(spans) == 2 + + concurrency_control_span, subscribe_span = spans + assert concurrency_control_span.name == "subscriber concurrency control" + assert subscribe_span.name == "subscriptionID subscribe" + + assert concurrency_control_span.parent == subscribe_span.context + + +def test_opentelemetry_subscriber_concurrency_control_span_end(span_exporter): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + opentelemetry_data.start_subscribe_concurrency_control_span() + msg.opentelemetry_data = opentelemetry_data + streaming_pull_manager._wrap_callback_errors(mock.Mock(), mock.Mock(), msg) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + + concurrency_control_span = spans[0] + concurrency_control_span.name == "subscriber concurrency control" + + +def test_opentelemetry_wrap_callback_error(span_exporter): + msg = create_message(b"foo") + streaming_pull_manager._wrap_callback_errors(mock.Mock(), mock.Mock(), msg) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index 0271cb456..7d0d5a9fc 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -96,3 +96,17 @@ def test_opentelemetry_set_subscribe_span_result_assert_error(): opentelemetry_data = SubscribeOpenTelemetry(msg) with pytest.raises(AssertionError): opentelemetry_data.set_subscribe_span_result("hi") + + +def test_opentelemetry_start_subscribe_concurrency_control_span_no_subscribe_span(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.start_subscribe_concurrency_control_span() + + +def test_opentelemetry_end_subscribe_concurrency_control_span_assertion_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.end_subscribe_concurrency_control_span() From b71cf157056d26f1c7c76da58d20ee154a48c6e1 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 19:01:02 +0000 Subject: [PATCH 24/56] Add subscribe scheduler span --- .../open_telemetry/subscribe_opentelemetry.py | 19 +++++++++++ .../subscriber/_protocol/messages_on_hold.py | 2 ++ .../subscriber/test_messages_on_hold.py | 33 ++++++++++++++++++- .../test_subscribe_opentelemetry.py | 14 ++++++++ 4 files changed, 67 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 0e210e14a..3bbebdcf9 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -40,6 +40,10 @@ def __init__(self, message: PubsubMessage): # `start_subscribe_concurrency_control_span` method. self._concurrency_control_span: Optional[trace.Span] = None + # scheduler span will be initialized by the + # `start_subscribe_scheduler_span` method. + self._scheduler_span: Optional[trace.Span] = None + def start_subscribe_span( self, subscription: str, @@ -108,3 +112,18 @@ def start_subscribe_concurrency_control_span(self) -> None: def end_subscribe_concurrency_control_span(self) -> None: assert self._concurrency_control_span is not None self._concurrency_control_span.end() + + def start_subscribe_scheduler_span(self) -> None: + assert self._subscribe_span is not None + tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name="subscriber scheduler", + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(self._subscribe_span), + end_on_exit=False, + ) as scheduler_span: + self._scheduler_span = scheduler_span + + def end_subscribe_scheduler_span(self) -> None: + assert self._scheduler_span is not None + self._scheduler_span.end() diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py index 63c2edbfa..3d4c2a392 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/messages_on_hold.py @@ -100,6 +100,8 @@ def put(self, message: "subscriber.message.Message") -> None: Args: message: The message to put on hold. """ + if message.opentelemetry_data: + message.opentelemetry_data.start_subscribe_scheduler_span() self._messages_on_hold.append(message) self._size = self._size + 1 diff --git a/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py b/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py index 5e1dcf91b..64963de48 100644 --- a/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py +++ b/tests/unit/pubsub_v1/subscriber/test_messages_on_hold.py @@ -14,9 +14,14 @@ import queue +from opentelemetry import trace + +from google.pubsub_v1 import types as gapic_types from google.cloud.pubsub_v1.subscriber import message from google.cloud.pubsub_v1.subscriber._protocol import messages_on_hold -from google.pubsub_v1 import types as gapic_types +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) def make_message(ack_id, ordering_key): @@ -37,6 +42,32 @@ def test_init(): assert moh.get() is None +def test_opentelemetry_subscriber_scheduler_span(span_exporter): + moh = messages_on_hold.MessagesOnHold() + msg = make_message(ack_id="ack1", ordering_key="") + opentelemetry_data = SubscribeOpenTelemetry(msg) + msg.opentelemetry_data = opentelemetry_data + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + moh.put(msg) + opentelemetry_data.end_subscribe_scheduler_span() + opentelemetry_data.end_subscribe_span() + + spans = span_exporter.get_finished_spans() + + assert len(spans) == 2 + + subscribe_scheduler_span, subscribe_span = spans + + assert subscribe_scheduler_span.name == "subscriber scheduler" + assert subscribe_scheduler_span.kind == trace.SpanKind.INTERNAL + assert subscribe_scheduler_span.parent == subscribe_span.context + + def test_put_and_get_unordered_messages(): moh = messages_on_hold.MessagesOnHold() diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index 7d0d5a9fc..aad87ebdf 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -110,3 +110,17 @@ def test_opentelemetry_end_subscribe_concurrency_control_span_assertion_error(): opentelemetry_data = SubscribeOpenTelemetry(msg) with pytest.raises(AssertionError): opentelemetry_data.end_subscribe_concurrency_control_span() + + +def test_opentelemetry_start_subscribe_scheduler_span_assertion_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.start_subscribe_scheduler_span() + + +def test_opentelemetry_end_subscribe_scheduler_span_assertion_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.end_subscribe_scheduler_span() From da797cf4b817de4c2ef2c54fecf4118cf75bc09e Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 19:34:39 +0000 Subject: [PATCH 25/56] End subscribe scheduler span --- .../_protocol/streaming_pull_manager.py | 3 +- .../subscriber/test_streaming_pull_manager.py | 44 ++++++++++++++++++- 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 682569e77..4b3df6df0 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -587,7 +587,8 @@ def _maybe_release_messages(self) -> None: msg = self._messages_on_hold.get() if not msg: break - + if msg.opentelemetry_data: + msg.opentelemetry_data.end_subscribe_scheduler_span() self._schedule_message_on_hold(msg) released_ack_ids.append(msg.ack_id) diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 43c35cf57..eae8cfdaa 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -21,6 +21,7 @@ import datetime import queue +from opentelemetry import trace from google.protobuf import timestamp_pb2 from google.api_core import datetime_helpers @@ -526,6 +527,45 @@ def test__maybe_release_messages_on_overload(): manager._scheduler.schedule.assert_not_called() +def test_opentelemetry__maybe_release_messages_subscribe_scheduler_span(span_exporter): + manager = make_manager( + flow_control=types.FlowControl(max_messages=10, max_bytes=1000) + ) + manager._callback = mock.sentinel.callback + + # Init leaser message count to 11, so that when subtracting the 3 messages + # that are on hold, there is still room for another 2 messages before the + # max load is hit. + _leaser = manager._leaser = mock.create_autospec(leaser.Leaser) + fake_leaser_add(_leaser, init_msg_count=8, assumed_msg_size=10) + msg = mock.create_autospec( + message.Message, instance=True, ack_id="ack_foo", size=10 + ) + msg.message_id = 3 + opentelemetry_data = SubscribeOpenTelemetry(msg) + msg.opentelemetry_data = opentelemetry_data + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + manager._messages_on_hold.put(msg) + manager._maybe_release_messages() + opentelemetry_data.end_subscribe_span() + spans = span_exporter.get_finished_spans() + + assert len(spans) == 2 + + subscriber_scheduler_span, subscribe_span = spans + + assert subscriber_scheduler_span.name == "subscriber scheduler" + assert subscribe_span.name == "subscriptionID subscribe" + + assert subscriber_scheduler_span.parent == subscribe_span.context + assert subscriber_scheduler_span.kind == trace.SpanKind.INTERNAL + + def test__maybe_release_messages_below_overload(): manager = make_manager( flow_control=types.FlowControl(max_messages=10, max_bytes=1000) @@ -2712,7 +2752,9 @@ def test_opentelemetry__on_response_subscribe_span_create(span_exporter): spans = span_exporter.get_finished_spans() # Subscribe span is still active, hence unexported. - assert len(spans) == 0 + # Subscriber scheduler spans corresponding to the two messages would be started in `messages_on_hold.put()`` + # and ended in `_maybe_release_messages` + assert len(spans) == 2 RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) From a00505764c87c693a0f88a8f53dd8983b5301115 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 20:09:18 +0000 Subject: [PATCH 26/56] Add subscription_id property to SubscribeOpenTelemetry * This field will then be used for span names of spans like Process Span --- .../pubsub_v1/open_telemetry/subscribe_opentelemetry.py | 9 +++++++++ .../pubsub_v1/subscriber/test_streaming_pull_manager.py | 1 + 2 files changed, 10 insertions(+) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 3bbebdcf9..1ec142077 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -44,6 +44,14 @@ def __init__(self, message: PubsubMessage): # `start_subscribe_scheduler_span` method. self._scheduler_span: Optional[trace.Span] = None + # This will be set by `start_subscribe_span` method and will be used + # for other spans, such as + self._subscription_id: Optional[str] = None + + @property + def subscription_id(self): + return self._subscription_id + def start_subscribe_span( self, subscription: str, @@ -58,6 +66,7 @@ def start_subscribe_span( ) assert len(subscription.split("/")) == 4 subscription_short_name = subscription.split("/")[3] + self._subscription_id = subscription_short_name with tracer.start_as_current_span( name=f"{subscription_short_name} subscribe", context=parent_span_context if parent_span_context else None, diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index eae8cfdaa..8bbf4b860 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -2820,6 +2820,7 @@ def test_opentelemetry_subscriber_concurrency_control_span(span_exporter): concurrency_control_span, subscribe_span = spans assert concurrency_control_span.name == "subscriber concurrency control" assert subscribe_span.name == "subscriptionID subscribe" + assert opentelemetry_data.subscription_id == "subscriptionID" assert concurrency_control_span.parent == subscribe_span.context From 47c9d9ac3cd948086cce22a214d31a96f578eb05 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 22:12:00 +0000 Subject: [PATCH 27/56] Start process span --- .../open_telemetry/subscribe_opentelemetry.py | 25 ++++++++++++++++++- .../_protocol/streaming_pull_manager.py | 1 + .../test_subscribe_opentelemetry.py | 7 ++++++ 3 files changed, 32 insertions(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 1ec142077..96f2c53e5 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -45,9 +45,12 @@ def __init__(self, message: PubsubMessage): self._scheduler_span: Optional[trace.Span] = None # This will be set by `start_subscribe_span` method and will be used - # for other spans, such as + # for other spans, such as process span. self._subscription_id: Optional[str] = None + # This will be set by `start_process_span` method. + self._process_span: Optional[trace.Span] = None + @property def subscription_id(self): return self._subscription_id @@ -136,3 +139,23 @@ def start_subscribe_scheduler_span(self) -> None: def end_subscribe_scheduler_span(self) -> None: assert self._scheduler_span is not None self._scheduler_span.end() + + def start_process_span(self) -> None: + assert self._subscribe_span is not None + tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + publish_create_span_link: Optional[trace.Link] = ( + trace.Link(self._subscribe_span.parent) + if self._subscribe_span.parent + else None + ) + with tracer.start_as_current_span( + name=f"{self._subscription_id} process", + attributes={ + "messaging.system": self._OPEN_TELEMETRY_MESSAGING_SYSTEM, + }, + kind=trace.SpanKind.INTERNAL, + context=set_span_in_context(self._subscribe_span), + links=[publish_create_span_link] if publish_create_span_link else None, + end_on_exit=False, + ) as process_span: + self._process_span = process_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index 4b3df6df0..cc815f114 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -128,6 +128,7 @@ def _wrap_callback_errors( try: if message.opentelemetry_data: message.opentelemetry_data.end_subscribe_concurrency_control_span() + message.opentelemetry_data.start_process_span() callback(message) except BaseException as exc: # Note: the likelihood of this failing is extremely low. This just adds diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index aad87ebdf..8a5542493 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -124,3 +124,10 @@ def test_opentelemetry_end_subscribe_scheduler_span_assertion_error(): opentelemetry_data = SubscribeOpenTelemetry(msg) with pytest.raises(AssertionError): opentelemetry_data.end_subscribe_scheduler_span() + + +def test_opentelemetry_start_process_span_assertion_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.start_process_span() From 8faa3cfbf8b71fad29ff4ba83d4f3a85f6683869 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 22:18:47 +0000 Subject: [PATCH 28/56] Add end process span to SubcribeOpenTelemetry --- .../pubsub_v1/open_telemetry/subscribe_opentelemetry.py | 4 ++++ .../pubsub_v1/subscriber/test_subscribe_opentelemetry.py | 7 +++++++ 2 files changed, 11 insertions(+) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 96f2c53e5..c0706d167 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -159,3 +159,7 @@ def start_process_span(self) -> None: end_on_exit=False, ) as process_span: self._process_span = process_span + + def end_process_span(self) -> None: + assert self._process_span is not None + self._process_span.end() diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index 8a5542493..2689f6b2b 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -131,3 +131,10 @@ def test_opentelemetry_start_process_span_assertion_error(): opentelemetry_data = SubscribeOpenTelemetry(msg) with pytest.raises(AssertionError): opentelemetry_data.start_process_span() + + +def test_opentelemetry_end_process_span_assertion_error(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + with pytest.raises(AssertionError): + opentelemetry_data.end_process_span() From 4189b226a3aa425f24f5d8040bdb57184c0de00a Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 22:46:04 +0000 Subject: [PATCH 29/56] End process span when ack and nack occur --- .../open_telemetry/subscribe_opentelemetry.py | 9 ++++ google/cloud/pubsub_v1/subscriber/message.py | 8 +++ .../unit/pubsub_v1/subscriber/test_message.py | 53 ++++++++++++++----- 3 files changed, 58 insertions(+), 12 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index c0706d167..e911b9302 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -163,3 +163,12 @@ def start_process_span(self) -> None: def end_process_span(self) -> None: assert self._process_span is not None self._process_span.end() + + def add_process_span_event(self, event: str) -> None: + assert self._process_span is not None + self._process_span.add_event( + name=event, + attributes={ + "timestamp": str(datetime.now()), + }, + ) diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index c9b2effd7..2c90109b8 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -270,6 +270,8 @@ def ack(self) -> None: """ if self.opentelemetry_data: self.opentelemetry_data.add_subscribe_span_event("ack start") + self.opentelemetry_data.add_process_span_event("ack called") + self.opentelemetry_data.end_process_span() time_to_ack = math.ceil(time.time() - self._received_timestamp) self._request_queue.put( requests.AckRequest( @@ -323,6 +325,8 @@ def ack_with_response(self) -> "futures.Future": """ if self.opentelemetry_data: self.opentelemetry_data.add_subscribe_span_event("ack start") + self.opentelemetry_data.add_process_span_event("ack called") + self.opentelemetry_data.end_process_span() req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() @@ -465,6 +469,8 @@ def nack(self) -> None: """ if self.opentelemetry_data: self.opentelemetry_data.add_subscribe_span_event("nack start") + self.opentelemetry_data.add_process_span_event("nack called") + self.opentelemetry_data.end_process_span() self._request_queue.put( requests.NackRequest( ack_id=self._ack_id, @@ -511,6 +517,8 @@ def nack_with_response(self) -> "futures.Future": """ if self.opentelemetry_data: self.opentelemetry_data.add_subscribe_span_event("nack start") + self.opentelemetry_data.add_process_span_event("nack called") + self.opentelemetry_data.end_process_span() req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 236453bac..7b4720ef6 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -144,15 +144,22 @@ def test_opentelemetry_ack(span_exporter): ack_id="ack_id", delivery_attempt=2, ) + opentelemetry_data.start_process_span() msg.opentelemetry_data = opentelemetry_data msg.ack() opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() - assert len(spans) == 1 + assert len(spans) == 2 + process_span, subscribe_span = spans - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "ack start" + assert subscribe_span.name == "subscriptionID subscribe" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "ack start" + + assert process_span.name == "subscriptionID process" + assert len(process_span.events) == 1 + assert process_span.events[0].name == "ack called" def test_opentelemetry_ack_with_response(span_exporter): @@ -165,15 +172,22 @@ def test_opentelemetry_ack_with_response(span_exporter): ack_id="ack_id", delivery_attempt=2, ) + opentelemetry_data.start_process_span() msg.opentelemetry_data = opentelemetry_data msg.ack_with_response() opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() - assert len(spans) == 1 + assert len(spans) == 2 + process_span, subscribe_span = spans - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "ack start" + assert subscribe_span.name == "subscriptionID subscribe" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "ack start" + + assert process_span.name == "subscriptionID process" + assert len(process_span.events) == 1 + assert process_span.events[0].name == "ack called" def test_opentelemetry_nack(span_exporter): @@ -186,15 +200,22 @@ def test_opentelemetry_nack(span_exporter): ack_id="ack_id", delivery_attempt=2, ) + opentelemetry_data.start_process_span() msg.opentelemetry_data = opentelemetry_data msg.nack() opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() - assert len(spans) == 1 + assert len(spans) == 2 + process_span, subscribe_span = spans - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "nack start" + assert subscribe_span.name == "subscriptionID subscribe" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "nack start" + + assert process_span.name == "subscriptionID process" + assert len(process_span.events) == 1 + assert process_span.events[0].name == "nack called" def test_opentelemetry_nack_with_response(span_exporter): @@ -207,15 +228,23 @@ def test_opentelemetry_nack_with_response(span_exporter): ack_id="ack_id", delivery_attempt=2, ) + opentelemetry_data.start_process_span() msg.opentelemetry_data = opentelemetry_data msg.nack_with_response() opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() - assert len(spans) == 1 + assert len(spans) == 2 - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "nack start" + process_span, subscribe_span = spans + + assert subscribe_span.name == "subscriptionID subscribe" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "nack start" + + assert process_span.name == "subscriptionID process" + assert len(process_span.events) == 1 + assert process_span.events[0].name == "nack called" def test_opentelemetry_modack(span_exporter): From 06ce105c5ade4bddc364b0862e7d4d70549d03cd Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Fri, 20 Sep 2024 23:13:31 +0000 Subject: [PATCH 30/56] End process span and add event when messages are dropped --- .../pubsub_v1/subscriber/_protocol/leaser.py | 12 ++- .../unit/pubsub_v1/subscriber/test_leaser.py | 99 +++++++++++++++++++ 2 files changed, 108 insertions(+), 3 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 70b149c4f..6bfb8f5b3 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -182,6 +182,10 @@ def maintain_leases(self) -> None: "Dropping %s items because they were leased too long.", len(to_drop) ) assert self._manager.dispatcher is not None + for drop_msg in to_drop: + if drop_msg.opentelemetry_data: + drop_msg.opentelemetry_data.add_process_span_event("dropped") + drop_msg.opentelemetry_data.end_process_span() self._manager.dispatcher.drop(to_drop) # Remove dropped items from our copy of the leased messages (they @@ -210,9 +214,6 @@ def maintain_leases(self) -> None: for message in list(leased_messages.values()) if message.opentelemetry_data ] - # for message in list(leased_messages.values()): - # if message.opentelemetry_data: - # opentelemetry_data.append(message.opentelemetry_data) expired_ack_ids = self._manager._send_lease_modacks( ack_id_gen, deadline, @@ -223,6 +224,11 @@ def maintain_leases(self) -> None: # If exactly once delivery is enabled, we should drop all expired ack_ids from lease management. if self._manager._exactly_once_delivery_enabled() and len(expired_ack_ids): assert self._manager.dispatcher is not None + for ack_id in expired_ack_ids: + msg = leased_messages.get(ack_id) + if msg.opentelemetry_data: + msg.opentelemetry_data.add_process_span_event("dropped") + msg.opentelemetry_data.end_process_span() self._manager.dispatcher.drop( [ requests.DropRequest( diff --git a/tests/unit/pubsub_v1/subscriber/test_leaser.py b/tests/unit/pubsub_v1/subscriber/test_leaser.py index f38717c6f..275744bd9 100644 --- a/tests/unit/pubsub_v1/subscriber/test_leaser.py +++ b/tests/unit/pubsub_v1/subscriber/test_leaser.py @@ -22,6 +22,10 @@ from google.cloud.pubsub_v1.subscriber._protocol import leaser from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber._protocol import streaming_pull_manager +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + SubscribeOpenTelemetry, +) +from google.cloud.pubsub_v1.subscriber import message # special case python < 3.8 if sys.version_info.major == 3 and sys.version_info.minor < 8: @@ -136,6 +140,101 @@ def trigger_done(timeout): leaser._stop_event.wait = trigger_done +def test_opentelemetry_dropped_message_process_span(span_exporter): + manager = create_manager() + leaser_ = leaser.Leaser(manager) + make_sleep_mark_event_as_done(leaser_) + msg = mock.create_autospec( + message.Message, instance=True, ack_id="ack_foo", size=10 + ) + msg.message_id = 3 + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + opentelemetry_data.start_process_span() + leaser_.add( + [ + requests.LeaseRequest( + ack_id="my ack id", + byte_size=50, + ordering_key="", + opentelemetry_data=opentelemetry_data, + ) + ] + ) + leased_messages_dict = leaser_._leased_messages + + # Setting the `sent_time`` to be less than `cutoff` in order to make the leased message expire. + # This will exercise the code path where the message would be dropped from the leaser + leased_messages_dict["my ack id"] = leased_messages_dict["my ack id"]._replace( + sent_time=0 + ) + + manager._send_lease_modacks.return_value = set() + leaser_.maintain_leases() + + opentelemetry_data.end_subscribe_span() + spans = span_exporter.get_finished_spans() + assert len(spans) == 2 + process_span, subscribe_span = spans + + assert process_span.name == "subscriptionID process" + assert subscribe_span.name == "subscriptionID subscribe" + + assert len(process_span.events) == 1 + assert process_span.events[0].name == "dropped" + + assert process_span.parent == subscribe_span.context + + +def test_opentelemetry_expired_message_exactly_once_process_span(span_exporter): + manager = create_manager() + leaser_ = leaser.Leaser(manager) + make_sleep_mark_event_as_done(leaser_) + msg = mock.create_autospec( + message.Message, instance=True, ack_id="ack_foo", size=10 + ) + msg.message_id = 3 + opentelemetry_data = SubscribeOpenTelemetry(msg) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=4, + ) + opentelemetry_data.start_process_span() + leaser_.add( + [ + requests.LeaseRequest( + ack_id="my ack id", + byte_size=50, + ordering_key="", + opentelemetry_data=opentelemetry_data, + ) + ] + ) + + manager._send_lease_modacks.return_value = ["my ack id"] + leaser_.maintain_leases() + + opentelemetry_data.end_subscribe_span() + spans = span_exporter.get_finished_spans() + assert len(spans) == 2 + process_span, subscribe_span = spans + + assert process_span.name == "subscriptionID process" + assert subscribe_span.name == "subscriptionID subscribe" + + assert len(process_span.events) == 1 + assert process_span.events[0].name == "dropped" + + assert process_span.parent == subscribe_span.context + + def test_maintain_leases_ack_ids(): manager = create_manager() leaser_ = leaser.Leaser(manager) From 5fa47736865f3b61559742d7c4e781dd28fac278 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sat, 21 Sep 2024 21:55:49 +0000 Subject: [PATCH 31/56] Fix the trace.Link from process span to publish span --- .../open_telemetry/subscribe_opentelemetry.py | 25 ++++++++++++++----- 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index e911b9302..64d915468 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -15,7 +15,7 @@ from typing import Optional from datetime import datetime -from opentelemetry import trace +from opentelemetry import trace, context from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator from opentelemetry.trace.propagation import set_span_in_context @@ -51,6 +51,11 @@ def __init__(self, message: PubsubMessage): # This will be set by `start_process_span` method. self._process_span: Optional[trace.Span] = None + # This will be set by `start_subscribe_span` method, if a publisher create span + # context was extracted from trace propagation. And will be used by spans like + # proces span to add links to the publisher create span. + self._publisher_create_span_context: Optional[context.Context] = None + @property def subscription_id(self): return self._subscription_id @@ -67,6 +72,7 @@ def start_subscribe_span( carrier=self._message, getter=OpenTelemetryContextGetter(), ) + self._publisher_create_span_context = parent_span_context assert len(subscription.split("/")) == 4 subscription_short_name = subscription.split("/")[3] self._subscription_id = subscription_short_name @@ -143,11 +149,18 @@ def end_subscribe_scheduler_span(self) -> None: def start_process_span(self) -> None: assert self._subscribe_span is not None tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) - publish_create_span_link: Optional[trace.Link] = ( - trace.Link(self._subscribe_span.parent) - if self._subscribe_span.parent - else None - ) + publish_create_span_link: Optional[trace.Link] = None + if self._publisher_create_span_context: + publish_create_span: trace.Span = trace.get_current_span( + self._publisher_create_span_context + ) + span_context: Optional[ + trace.SpanContext + ] = publish_create_span.get_span_context() + publish_create_span_link = ( + trace.Link(span_context) if span_context else None + ) + with tracer.start_as_current_span( name=f"{self._subscription_id} process", attributes={ From f30c2c42409c73e68a700b97ddcb7f8e293b4f5f Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sat, 21 Sep 2024 22:00:18 +0000 Subject: [PATCH 32/56] Add null check before checking if Subscriber message has opentelemetry data --- google/cloud/pubsub_v1/subscriber/_protocol/leaser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 6bfb8f5b3..bfb76a9a3 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -226,7 +226,7 @@ def maintain_leases(self) -> None: assert self._manager.dispatcher is not None for ack_id in expired_ack_ids: msg = leased_messages.get(ack_id) - if msg.opentelemetry_data: + if msg and msg.opentelemetry_data: msg.opentelemetry_data.add_process_span_event("dropped") msg.opentelemetry_data.end_process_span() self._manager.dispatcher.drop( From 3ae241837b3f4c64505b42043aa47352bb2e07e7 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sat, 21 Sep 2024 23:57:14 +0000 Subject: [PATCH 33/56] Add tests to verify process span links and subscribe span parent --- .../test_subscribe_opentelemetry.py | 49 +++++++++++++++++++ 1 file changed, 49 insertions(+) diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index 2689f6b2b..bc079bac2 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -20,6 +20,11 @@ from google.protobuf import timestamp_pb2 from google.api_core import datetime_helpers +from opentelemetry import trace +from opentelemetry.trace.propagation.tracecontext import TraceContextTextMapPropagator +from google.cloud.pubsub_v1.open_telemetry.context_propagation import ( + OpenTelemetryContextSetter, +) from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( SubscribeOpenTelemetry, @@ -138,3 +143,47 @@ def test_opentelemetry_end_process_span_assertion_error(): opentelemetry_data = SubscribeOpenTelemetry(msg) with pytest.raises(AssertionError): opentelemetry_data.end_process_span() + + +def test_opentelemetry_start_process_span_publisher_link(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + msg.opentelemetry_data = opentelemetry_data + tracer = trace.get_tracer("foo") + publisher_create_span = None + with tracer.start_as_current_span(name="name") as span: + publisher_create_span = span + TraceContextTextMapPropagator().inject( + carrier=msg._message, + setter=OpenTelemetryContextSetter(), + ) + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + opentelemetry_data.start_process_span() + assert len(opentelemetry_data._process_span.links) == 1 + assert ( + opentelemetry_data._process_span.links[0].context.span_id + == publisher_create_span.get_span_context().span_id + ) + + +def test_opentelemetry_start_process_span_no_publisher_span(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + msg.opentelemetry_data = opentelemetry_data + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + opentelemetry_data.start_process_span() + # Assert that when no context is propagated, the subscriber span has no parent. + assert opentelemetry_data._subscribe_span.parent is None + # Assert that when there is no publisher create span context propagated, + # There are no links created in the process span. + assert len(opentelemetry_data._process_span.links) == 0 From d1c870d591e90beacb2abc52564ac09d56c79925 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 02:59:07 +0000 Subject: [PATCH 34/56] Move adding dropped event and ending subscriber span from dispatcher.drop to leaser.maintain_leases * dispatcher.drop() is also called by ack, _retry_ack, nack and maintain_leases(when it wants to drop expired messages. * We only want to record the dropped event when the message is expired and hence dropped by the leaser --- .../subscriber/_protocol/dispatcher.py | 4 ---- .../pubsub_v1/subscriber/_protocol/leaser.py | 20 ++++++++++++------- .../subscriber/_protocol/requests.py | 1 - 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index ddd0df09c..04bfda5cf 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -316,10 +316,6 @@ def drop( Args: items: The items to drop. """ - for item in items: - if item.opentelemetry_data: - item.opentelemetry_data.set_subscribe_span_result("dropped") - item.opentelemetry_data.end_subscribe_span() assert self._manager.leaser is not None self._manager.leaser.remove(items) ordering_keys = (k.ordering_key for k in items if k.ordering_key) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index bfb76a9a3..bc702f87e 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -170,9 +170,7 @@ def maintain_leases(self) -> None: # and allow the Pub/Sub server to resend them. cutoff = time.time() - self._manager.flow_control.max_lease_duration to_drop = [ - requests.DropRequest( - ack_id, item.size, item.ordering_key, item.opentelemetry_data - ) + requests.DropRequest(ack_id, item.size, item.ordering_key) for ack_id, item in leased_messages.items() if item.sent_time < cutoff ] @@ -183,9 +181,16 @@ def maintain_leases(self) -> None: ) assert self._manager.dispatcher is not None for drop_msg in to_drop: - if drop_msg.opentelemetry_data: - drop_msg.opentelemetry_data.add_process_span_event("dropped") - drop_msg.opentelemetry_data.end_process_span() + leased_message = leased_messages.get(drop_msg.ack_id) + if leased_message and leased_message.opentelemetry_data: + leased_message.opentelemetry_data.add_process_span_event( + "dropped" + ) + leased_message.opentelemetry_data.end_process_span() + leased_message.opentelemetry_data.set_subscribe_span_result( + "dropped" + ) + leased_message.opentelemetry_data.end_subscribe_span() self._manager.dispatcher.drop(to_drop) # Remove dropped items from our copy of the leased messages (they @@ -229,13 +234,14 @@ def maintain_leases(self) -> None: if msg and msg.opentelemetry_data: msg.opentelemetry_data.add_process_span_event("dropped") msg.opentelemetry_data.end_process_span() + msg.opentelemetry_data.set_subscribe_span_result("dropped") + msg.opentelemetry_data.end_subscribe_span() self._manager.dispatcher.drop( [ requests.DropRequest( ack_id, leased_messages.get(ack_id).size, # type: ignore leased_messages.get(ack_id).ordering_key, # type: ignore - leased_messages.get(ack_id).opentelemetry_data, # type: ignore ) for ack_id in expired_ack_ids if ack_id in leased_messages diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py index 7736e4456..6fd35896b 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/requests.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/requests.py @@ -38,7 +38,6 @@ class DropRequest(NamedTuple): ack_id: str byte_size: int ordering_key: Optional[str] - opentelemetry_data: Optional[SubscribeOpenTelemetry] = None class LeaseRequest(NamedTuple): From cb1f97482a0903b80f5dc8b65cbe164dda0763a5 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 03:28:39 +0000 Subject: [PATCH 35/56] Move ack called subscribe span event from message.ack() to dispatcher.ack() where the ack RPC is made --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 13 +++++++++---- google/cloud/pubsub_v1/subscriber/message.py | 2 -- tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 10 ++++++---- tests/unit/pubsub_v1/subscriber/test_message.py | 6 ++---- 4 files changed, 17 insertions(+), 14 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 04bfda5cf..d20bfa223 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -232,7 +232,9 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: items_gen = iter(items) ack_ids_gen = (item.ack_id for item in items) total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE)) - + for item in items: + if item.opentelemetry_data: + item.opentelemetry_data.add_subscribe_span_event("ack start") for _ in range(total_chunks): ack_reqs_dict = { req.ack_id: req @@ -245,8 +247,8 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: for completed_ack in requests_completed: if completed_ack.opentelemetry_data: - completed_ack.opentelemetry_data.set_subscribe_span_result("acked") completed_ack.opentelemetry_data.add_subscribe_span_event("ack end") + completed_ack.opentelemetry_data.set_subscribe_span_result("acked") completed_ack.opentelemetry_data.end_subscribe_span() # Remove the completed messages from lease management. @@ -273,7 +275,7 @@ def _start_retry_thread(self, thread_name, thread_target): # a back-end timeout error or other permanent failure. retry_thread.start() - def _retry_acks(self, requests_to_retry): + def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): retry_delay_gen = exponential_sleep_generator( initial=_MIN_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS, maximum=_MAX_EXACTLY_ONCE_DELIVERY_ACK_MODACK_RETRY_DURATION_SECS, @@ -288,6 +290,9 @@ def _retry_acks(self, requests_to_retry): time.sleep(time_to_wait) ack_reqs_dict = {req.ack_id: req for req in requests_to_retry} + for req in requests_to_retry: + if req.opentelemetry_data: + req.opentelemetry_data.add_subscribe_span_event("ack start") requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=[req.ack_id for req in requests_to_retry], ack_reqs_dict=ack_reqs_dict, @@ -295,8 +300,8 @@ def _retry_acks(self, requests_to_retry): for completed_ack in requests_completed: if completed_ack.opentelemetry_data: - completed_ack.opentelemetry_data.set_subscribe_span_result("acked") completed_ack.opentelemetry_data.add_subscribe_span_event("ack end") + completed_ack.opentelemetry_data.set_subscribe_span_result("acked") completed_ack.opentelemetry_data.end_subscribe_span() assert ( diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 2c90109b8..82de8a591 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -269,7 +269,6 @@ def ack(self) -> None: """ if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("ack start") self.opentelemetry_data.add_process_span_event("ack called") self.opentelemetry_data.end_process_span() time_to_ack = math.ceil(time.time() - self._received_timestamp) @@ -324,7 +323,6 @@ def ack_with_response(self) -> "futures.Future": will be thrown. """ if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("ack start") self.opentelemetry_data.add_process_span_event("ack called") self.opentelemetry_data.end_process_span() req_future: Optional[futures.Future] diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 5626fa8c2..6116c5886 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -436,8 +436,9 @@ def test_opentelemetry_ack(span_exporter): assert "messaging.gcp_pubsub.result" in subscribe_span.attributes assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "ack end" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "ack start" + assert subscribe_span.events[1].name == "ack end" def test_ack(): @@ -591,8 +592,9 @@ def test_opentelemetry_retry_acks(span_exporter): assert "messaging.gcp_pubsub.result" in subscribe_span.attributes assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "ack end" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "ack start" + assert subscribe_span.events[1].name == "ack end" def test_retry_acks(): diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 7b4720ef6..099e08eb7 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -154,8 +154,7 @@ def test_opentelemetry_ack(span_exporter): process_span, subscribe_span = spans assert subscribe_span.name == "subscriptionID subscribe" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "ack start" + assert len(subscribe_span.events) == 0 assert process_span.name == "subscriptionID process" assert len(process_span.events) == 1 @@ -182,8 +181,7 @@ def test_opentelemetry_ack_with_response(span_exporter): process_span, subscribe_span = spans assert subscribe_span.name == "subscriptionID subscribe" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "ack start" + assert len(subscribe_span.events) == 0 assert process_span.name == "subscriptionID process" assert len(process_span.events) == 1 From 7d6ea08c06551380cd8973d74f17179995bc2c31 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 03:40:19 +0000 Subject: [PATCH 36/56] Move nack start event from message.nack and message.nack_with_response to dispatcher.modify_ack_deadline(deadline=0) when nack RPC is made --- google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py | 5 ++++- google/cloud/pubsub_v1/subscriber/message.py | 2 -- tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 5 +++-- tests/unit/pubsub_v1/subscriber/test_message.py | 6 ++---- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index d20bfa223..57dc593cb 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -353,7 +353,10 @@ def modify_ack_deadline( ack_ids_gen = (item.ack_id for item in items) deadline_seconds_gen = (item.seconds for item in items) total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE)) - + for item in items: + if item.opentelemetry_data: + if math.isclose(item.seconds, 0): + item.opentelemetry_data.add_subscribe_span_event("nack start") for _ in range(total_chunks): ack_reqs_dict = { req.ack_id: req diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 82de8a591..22c4e96e1 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -466,7 +466,6 @@ def nack(self) -> None: or another. """ if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("nack start") self.opentelemetry_data.add_process_span_event("nack called") self.opentelemetry_data.end_process_span() self._request_queue.put( @@ -514,7 +513,6 @@ def nack_with_response(self) -> "futures.Future": """ if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("nack start") self.opentelemetry_data.add_process_span_event("nack called") self.opentelemetry_data.end_process_span() req_future: Optional[futures.Future] diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 6116c5886..2a0e46906 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -863,8 +863,9 @@ def test_opentelemetry_nack(span_exporter): subscribe_span = spans[0] assert "messaging.gcp_pubsub.result" in subscribe_span.attributes assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "nack end" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "nack start" + assert subscribe_span.events[1].name == "nack end" def test_nack(): diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 099e08eb7..7af4e2d81 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -208,8 +208,7 @@ def test_opentelemetry_nack(span_exporter): process_span, subscribe_span = spans assert subscribe_span.name == "subscriptionID subscribe" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "nack start" + assert len(subscribe_span.events) == 0 assert process_span.name == "subscriptionID process" assert len(process_span.events) == 1 @@ -237,8 +236,7 @@ def test_opentelemetry_nack_with_response(span_exporter): process_span, subscribe_span = spans assert subscribe_span.name == "subscriptionID subscribe" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "nack start" + assert len(subscribe_span.events) == 0 assert process_span.name == "subscriptionID process" assert len(process_span.events) == 1 From e07fa4de3a3b21fb110facbebab2d1a8aea99520 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 03:46:28 +0000 Subject: [PATCH 37/56] Move modack start event from message.modify_ack_deadline/with response to dispatcher.modify_ack_deadline() --- google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py | 2 ++ google/cloud/pubsub_v1/subscriber/message.py | 4 ---- tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 5 +++-- tests/unit/pubsub_v1/subscriber/test_message.py | 6 ++---- 4 files changed, 7 insertions(+), 10 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 57dc593cb..0fc02ddeb 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -357,6 +357,8 @@ def modify_ack_deadline( if item.opentelemetry_data: if math.isclose(item.seconds, 0): item.opentelemetry_data.add_subscribe_span_event("nack start") + else: + item.opentelemetry_data.add_subscribe_span_event("modack start") for _ in range(total_chunks): ack_reqs_dict = { req.ack_id: req diff --git a/google/cloud/pubsub_v1/subscriber/message.py b/google/cloud/pubsub_v1/subscriber/message.py index 22c4e96e1..61f60c4d9 100644 --- a/google/cloud/pubsub_v1/subscriber/message.py +++ b/google/cloud/pubsub_v1/subscriber/message.py @@ -380,8 +380,6 @@ def modify_ack_deadline(self, seconds: int) -> None: between 0 and 600. Due to network latency, values below 10 are advised against. """ - if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("modack start") self._request_queue.put( requests.ModAckRequest( ack_id=self._ack_id, @@ -437,8 +435,6 @@ def modify_ack_deadline_with_response(self, seconds: int) -> "futures.Future": will be thrown. """ - if self.opentelemetry_data: - self.opentelemetry_data.add_subscribe_span_event("modack start") req_future: Optional[futures.Future] if self._exactly_once_delivery_enabled_func(): future = futures.Future() diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 2a0e46906..869df65bf 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -399,8 +399,9 @@ def test_opentelemetry_modify_ack_deadline(span_exporter): assert "messaging.gcp_pubsub.result" in subscribe_span.attributes assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "modacked" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "modack end" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "modack start" + assert subscribe_span.events[1].name == "modack end" def test_opentelemetry_ack(span_exporter): diff --git a/tests/unit/pubsub_v1/subscriber/test_message.py b/tests/unit/pubsub_v1/subscriber/test_message.py index 7af4e2d81..8d9d2566e 100644 --- a/tests/unit/pubsub_v1/subscriber/test_message.py +++ b/tests/unit/pubsub_v1/subscriber/test_message.py @@ -260,8 +260,7 @@ def test_opentelemetry_modack(span_exporter): spans = span_exporter.get_finished_spans() assert len(spans) == 1 - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "modack start" + assert len(spans[0].events) == 0 def test_opentelemetry_modack_with_response(span_exporter): @@ -281,8 +280,7 @@ def test_opentelemetry_modack_with_response(span_exporter): spans = span_exporter.get_finished_spans() assert len(spans) == 1 - assert len(spans[0].events) == 1 - assert spans[0].events[0].name == "modack start" + assert len(spans[0].events) == 0 def test_ack(): From c38446677d0fdbaaea1cc63cfe08ab8fcf6390f4 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 03:54:06 +0000 Subject: [PATCH 38/56] Rename the "dropped" event with "expired" --- google/cloud/pubsub_v1/subscriber/_protocol/leaser.py | 8 ++++---- tests/unit/pubsub_v1/subscriber/test_leaser.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index bc702f87e..5abdb7081 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -184,11 +184,11 @@ def maintain_leases(self) -> None: leased_message = leased_messages.get(drop_msg.ack_id) if leased_message and leased_message.opentelemetry_data: leased_message.opentelemetry_data.add_process_span_event( - "dropped" + "expired" ) leased_message.opentelemetry_data.end_process_span() leased_message.opentelemetry_data.set_subscribe_span_result( - "dropped" + "expired" ) leased_message.opentelemetry_data.end_subscribe_span() self._manager.dispatcher.drop(to_drop) @@ -232,9 +232,9 @@ def maintain_leases(self) -> None: for ack_id in expired_ack_ids: msg = leased_messages.get(ack_id) if msg and msg.opentelemetry_data: - msg.opentelemetry_data.add_process_span_event("dropped") + msg.opentelemetry_data.add_process_span_event("expired") msg.opentelemetry_data.end_process_span() - msg.opentelemetry_data.set_subscribe_span_result("dropped") + msg.opentelemetry_data.set_subscribe_span_result("expired") msg.opentelemetry_data.end_subscribe_span() self._manager.dispatcher.drop( [ diff --git a/tests/unit/pubsub_v1/subscriber/test_leaser.py b/tests/unit/pubsub_v1/subscriber/test_leaser.py index 275744bd9..b5b5cac20 100644 --- a/tests/unit/pubsub_v1/subscriber/test_leaser.py +++ b/tests/unit/pubsub_v1/subscriber/test_leaser.py @@ -186,7 +186,7 @@ def test_opentelemetry_dropped_message_process_span(span_exporter): assert subscribe_span.name == "subscriptionID subscribe" assert len(process_span.events) == 1 - assert process_span.events[0].name == "dropped" + assert process_span.events[0].name == "expired" assert process_span.parent == subscribe_span.context @@ -230,7 +230,7 @@ def test_opentelemetry_expired_message_exactly_once_process_span(span_exporter): assert subscribe_span.name == "subscriptionID subscribe" assert len(process_span.events) == 1 - assert process_span.events[0].name == "dropped" + assert process_span.events[0].name == "expired" assert process_span.parent == subscribe_span.context From 9ba47ffdd3f7bec3b2f007ecdea71b6d45d766e2 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Sun, 22 Sep 2024 21:45:07 +0000 Subject: [PATCH 39/56] Add modack span --- .../open_telemetry/subscribe_opentelemetry.py | 48 ++++++++++++++- .../pubsub_v1/subscriber/_protocol/leaser.py | 1 + .../_protocol/streaming_pull_manager.py | 36 ++++++++++- .../subscriber/test_streaming_pull_manager.py | 61 +++++++++++++++---- .../test_subscribe_opentelemetry.py | 13 ++++ 5 files changed, 145 insertions(+), 14 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 64d915468..bf97e71ea 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -12,7 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from typing import Optional +from typing import Optional, List from datetime import datetime from opentelemetry import trace, context @@ -56,10 +56,22 @@ def __init__(self, message: PubsubMessage): # proces span to add links to the publisher create span. self._publisher_create_span_context: Optional[context.Context] = None + # This will be set by `start_subscribe_span` method and will be used + # for other spans, such as modack span. + self._project_id: Optional[str] = None + @property - def subscription_id(self): + def subscription_id(self) -> Optional[str]: return self._subscription_id + @property + def project_id(self) -> Optional[str]: + return self._project_id + + @property + def subscribe_span(self) -> Optional[trace.Span]: + return self._subscribe_span + def start_subscribe_span( self, subscription: str, @@ -75,6 +87,7 @@ def start_subscribe_span( self._publisher_create_span_context = parent_span_context assert len(subscription.split("/")) == 4 subscription_short_name = subscription.split("/")[3] + self._project_id = subscription.split("/")[1] self._subscription_id = subscription_short_name with tracer.start_as_current_span( name=f"{subscription_short_name} subscribe", @@ -185,3 +198,34 @@ def add_process_span_event(self, event: str) -> None: "timestamp": str(datetime.now()), }, ) + + +def start_modack_span( + subscribe_span_links: List[trace.Link], + subscription_id: Optional[str], + message_count: int, + deadline: float, + project_id: Optional[str], + code_function: str, +) -> trace.Span: + _OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" + _OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" + assert subscription_id is not None + assert project_id is not None + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name=f"{subscription_id} modack", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.batch.message_count": message_count, + "messaging.gcp_pubsub.message.ack_deadline": deadline, + "messaging.destination.name": subscription_id, + "gcp.project_id": project_id, + "messaging.operation.name": "modack", + "code.function": code_function, + }, + links=subscribe_span_links, + kind=trace.SpanKind.CLIENT, + end_on_exit=False, + ) as modack_span: + return modack_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index 5abdb7081..eaac3a08e 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -219,6 +219,7 @@ def maintain_leases(self) -> None: for message in list(leased_messages.values()) if message.opentelemetry_data ] + expired_ack_ids = self._manager._send_lease_modacks( ack_id_gen, deadline, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index cc815f114..c7b3c0d3b 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -23,6 +23,7 @@ from typing import Any, Dict, Callable, Iterable, List, Optional, Set, Tuple import uuid +from opentelemetry import trace import grpc # type: ignore from google.api_core import bidi @@ -49,6 +50,9 @@ from google.rpc.error_details_pb2 import ErrorInfo # type: ignore from google.rpc import code_pb2 # type: ignore from google.rpc import status_pb2 +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + start_modack_span, +) if typing.TYPE_CHECKING: # pragma: NO COVER from google.cloud.pubsub_v1 import subscriber @@ -1023,6 +1027,31 @@ def _send_lease_modacks( warn_on_invalid=True, ) -> Set[str]: exactly_once_enabled = False + + modack_span: Optional[trace.Span] = None + if self._client.open_telemetry_enabled: + subscribe_span_links: List[trace.Link] = [] + assert len(self._subscription.split("/")) == 4 + subscription_id: str = self._subscription.split("/")[3] + project_id: str = self._subscription.split("/")[1] + for data in opentelemetry_data: + subscribe_span: Optional[trace.Span] = data.subscribe_span + if ( + subscribe_span + and subscribe_span.get_span_context().trace_flags.sampled + ): + subscribe_span_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + modack_span = start_modack_span( + subscribe_span_links, + subscription_id, + len(opentelemetry_data), + ack_deadline, + project_id, + "_send_lease_modacks", + ) + with self._exactly_once_enabled_lock: exactly_once_enabled = self._exactly_once_enabled if exactly_once_enabled: @@ -1040,7 +1069,10 @@ def _send_lease_modacks( assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) - + if ( + modack_span + ): # pragma: NO COVER # Identical code covered in the same function below + modack_span.end() expired_ack_ids = set() for req in items: try: @@ -1070,6 +1102,8 @@ def _send_lease_modacks( item._replace(opentelemetry_data=data) assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) + if modack_span: + modack_span.end() return set() def _exactly_once_delivery_enabled(self) -> bool: diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 8bbf4b860..bc7e8bbd9 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -20,6 +20,7 @@ import types as stdlib_types import datetime import queue +import math from opentelemetry import trace from google.protobuf import timestamp_pb2 @@ -632,7 +633,10 @@ def test__maybe_release_messages_negative_on_hold_bytes_warning(caplog): def test_opentelemetry__send_lease_modacks(span_exporter): - manager, _, _, _, _, _ = make_running_manager(enable_open_telemetry=True) + manager, _, _, _, _, _ = make_running_manager( + enable_open_telemetry=True, + subscription_name="projects/projectID/subscriptions/subscriptionID", + ) data1 = SubscribeOpenTelemetry( message=gapic_types.PubsubMessage(data=b"foo", message_id="1") ) @@ -652,19 +656,42 @@ def test_opentelemetry__send_lease_modacks(span_exporter): ack_id="ack_id1", delivery_attempt=2, ) - manager._send_lease_modacks( - ack_ids=["ack_id1", "ack_id2"], - ack_deadline=20, - opentelemetry_data=[data1, data2], - ) + mock_span_context = mock.Mock(spec=trace.SpanContext) + mock_span_context.trace_flags.sampled = False + with mock.patch.object( + data1._subscribe_span, "get_span_context", return_value=mock_span_context + ): + manager._send_lease_modacks( + ack_ids=["ack_id1", "ack_id2"], + ack_deadline=20, + opentelemetry_data=[data1, data2], + ) data1.end_subscribe_span() data2.end_subscribe_span() spans = span_exporter.get_finished_spans() - assert len(spans) == 2 + assert len(spans) == 3 + modack_span, subscribe_span1, subscribe_span2 = spans + + assert len(subscribe_span1.events) == 1 + assert subscribe_span1.events[0].name == "modack start" - for span in spans: - assert len(span.events) == 1 - assert span.events[0].name == "modack start" + assert len(subscribe_span2.events) == 1 + assert subscribe_span2.events[0].name == "modack start" + + assert modack_span.name == "subscriptionID modack" + assert modack_span.parent is None + assert modack_span.kind == trace.SpanKind.CLIENT + assert len(modack_span.links) == 1 + modack_span_attributes = modack_span.attributes + assert modack_span_attributes["messaging.system"] == "gcp_pubsub" + assert modack_span_attributes["messaging.batch.message_count"] == 2 + assert math.isclose( + modack_span_attributes["messaging.gcp_pubsub.message.ack_deadline"], 20 + ) + assert modack_span_attributes["messaging.destination.name"] == "subscriptionID" + assert modack_span_attributes["gcp.project_id"] == "projectID" + assert modack_span_attributes["messaging.operation.name"] == "modack" + assert modack_span_attributes["code.function"] == "_send_lease_modacks" def test_send_unary_ack(): @@ -2754,7 +2781,19 @@ def test_opentelemetry__on_response_subscribe_span_create(span_exporter): # Subscribe span is still active, hence unexported. # Subscriber scheduler spans corresponding to the two messages would be started in `messages_on_hold.put()`` # and ended in `_maybe_release_messages` - assert len(spans) == 2 + assert len(spans) == 3 + modack_span = spans[0] + + for span in spans[1:]: + assert span.name == "subscriber scheduler" + assert span.kind == trace.SpanKind.INTERNAL + assert span.parent is not None + assert len(span.attributes) == 0 + + assert modack_span.name == "subscriptionID modack" + assert modack_span.kind == trace.SpanKind.CLIENT + assert modack_span.parent is None + assert len(modack_span.links) == 2 RECEIVED = datetime.datetime(2012, 4, 21, 15, 0, tzinfo=datetime.timezone.utc) diff --git a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py index bc079bac2..2fb89aa7c 100644 --- a/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py +++ b/tests/unit/pubsub_v1/subscriber/test_subscribe_opentelemetry.py @@ -187,3 +187,16 @@ def test_opentelemetry_start_process_span_no_publisher_span(): # Assert that when there is no publisher create span context propagated, # There are no links created in the process span. assert len(opentelemetry_data._process_span.links) == 0 + + +def test_opentelemetry_project_id_set_after_create_subscribe_span(): + msg = create_message(b"foo") + opentelemetry_data = SubscribeOpenTelemetry(msg) + msg.opentelemetry_data = opentelemetry_data + opentelemetry_data.start_subscribe_span( + subscription="projects/projectId/subscriptions/subscriptionID", + exactly_once_enabled=False, + ack_id="ack_id", + delivery_attempt=4, + ) + assert opentelemetry_data.project_id == "projectId" From 2410a4edd08ac131a1c3c83f7bb8259e8099bcf3 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 23 Sep 2024 00:08:49 +0000 Subject: [PATCH 40/56] Add is_receipt_modack attribute to modack span --- .../open_telemetry/subscribe_opentelemetry.py | 2 ++ .../cloud/pubsub_v1/subscriber/_protocol/leaser.py | 1 - .../subscriber/_protocol/streaming_pull_manager.py | 3 +++ .../subscriber/test_streaming_pull_manager.py | 14 +++++++++++++- 4 files changed, 18 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index bf97e71ea..0189c6fbc 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -207,6 +207,7 @@ def start_modack_span( deadline: float, project_id: Optional[str], code_function: str, + receipt_modack: bool, ) -> trace.Span: _OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" _OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" @@ -223,6 +224,7 @@ def start_modack_span( "gcp.project_id": project_id, "messaging.operation.name": "modack", "code.function": code_function, + "messaging.gcp_pubsub.is_receipt_modack": receipt_modack, }, links=subscribe_span_links, kind=trace.SpanKind.CLIENT, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py index eaac3a08e..5abdb7081 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/leaser.py @@ -219,7 +219,6 @@ def maintain_leases(self) -> None: for message in list(leased_messages.values()) if message.opentelemetry_data ] - expired_ack_ids = self._manager._send_lease_modacks( ack_id_gen, deadline, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index c7b3c0d3b..ffcc2c899 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1025,6 +1025,7 @@ def _send_lease_modacks( ack_deadline: float, opentelemetry_data: List[SubscribeOpenTelemetry], warn_on_invalid=True, + receipt_modack: bool = False, ) -> Set[str]: exactly_once_enabled = False @@ -1050,6 +1051,7 @@ def _send_lease_modacks( ack_deadline, project_id, "_send_lease_modacks", + receipt_modack, ) with self._exactly_once_enabled_lock: @@ -1175,6 +1177,7 @@ def _on_response(self, response: gapic_types.StreamingPullResponse) -> None: self.ack_deadline, subscribe_opentelemetry, warn_on_invalid=False, + receipt_modack=True, ) with self._pause_resume_lock: diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index bc7e8bbd9..fd00d1dfa 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -632,7 +632,14 @@ def test__maybe_release_messages_negative_on_hold_bytes_warning(caplog): assert manager._on_hold_bytes == 0 # should be auto-corrected -def test_opentelemetry__send_lease_modacks(span_exporter): +@pytest.mark.parametrize( + "receipt_modack", + [ + True, + False, + ], +) +def test_opentelemetry__send_lease_modacks(span_exporter, receipt_modack): manager, _, _, _, _, _ = make_running_manager( enable_open_telemetry=True, subscription_name="projects/projectID/subscriptions/subscriptionID", @@ -665,6 +672,7 @@ def test_opentelemetry__send_lease_modacks(span_exporter): ack_ids=["ack_id1", "ack_id2"], ack_deadline=20, opentelemetry_data=[data1, data2], + receipt_modack=receipt_modack, ) data1.end_subscribe_span() data2.end_subscribe_span() @@ -692,6 +700,10 @@ def test_opentelemetry__send_lease_modacks(span_exporter): assert modack_span_attributes["gcp.project_id"] == "projectID" assert modack_span_attributes["messaging.operation.name"] == "modack" assert modack_span_attributes["code.function"] == "_send_lease_modacks" + assert ( + modack_span_attributes["messaging.gcp_pubsub.is_receipt_modack"] + == receipt_modack + ) def test_send_unary_ack(): From b66fd19e5cf7e43e75c076bce7e25dce98878735 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 23 Sep 2024 02:04:14 +0000 Subject: [PATCH 41/56] Add ack span --- .../open_telemetry/subscribe_opentelemetry.py | 44 ++++++++++---- .../subscriber/_protocol/dispatcher.py | 34 +++++++++++ .../pubsub_v1/subscriber/test_dispatcher.py | 59 +++++++++++++++---- 3 files changed, 113 insertions(+), 24 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 0189c6fbc..aae1ea9a4 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -24,11 +24,11 @@ ) from google.pubsub_v1.types import PubsubMessage +_OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" +_OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" -class SubscribeOpenTelemetry: - _OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" - _OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" +class SubscribeOpenTelemetry: def __init__(self, message: PubsubMessage): self._message: PubsubMessage = message @@ -79,7 +79,7 @@ def start_subscribe_span( ack_id: str, delivery_attempt: int, ) -> None: - tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) parent_span_context = TraceContextTextMapPropagator().extract( carrier=self._message, getter=OpenTelemetryContextGetter(), @@ -94,7 +94,7 @@ def start_subscribe_span( context=parent_span_context if parent_span_context else None, kind=trace.SpanKind.CONSUMER, attributes={ - "messaging.system": self._OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, "messaging.destination.name": subscription_short_name, "gcp.project_id": subscription.split("/")[1], "messaging.message.id": self._message.message_id, @@ -131,7 +131,7 @@ def set_subscribe_span_result(self, result: str) -> None: def start_subscribe_concurrency_control_span(self) -> None: assert self._subscribe_span is not None - tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) with tracer.start_as_current_span( name="subscriber concurrency control", kind=trace.SpanKind.INTERNAL, @@ -146,7 +146,7 @@ def end_subscribe_concurrency_control_span(self) -> None: def start_subscribe_scheduler_span(self) -> None: assert self._subscribe_span is not None - tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) with tracer.start_as_current_span( name="subscriber scheduler", kind=trace.SpanKind.INTERNAL, @@ -161,7 +161,7 @@ def end_subscribe_scheduler_span(self) -> None: def start_process_span(self) -> None: assert self._subscribe_span is not None - tracer = trace.get_tracer(self._OPEN_TELEMETRY_TRACER_NAME) + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) publish_create_span_link: Optional[trace.Link] = None if self._publisher_create_span_context: publish_create_span: trace.Span = trace.get_current_span( @@ -177,7 +177,7 @@ def start_process_span(self) -> None: with tracer.start_as_current_span( name=f"{self._subscription_id} process", attributes={ - "messaging.system": self._OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, }, kind=trace.SpanKind.INTERNAL, context=set_span_in_context(self._subscribe_span), @@ -209,8 +209,6 @@ def start_modack_span( code_function: str, receipt_modack: bool, ) -> trace.Span: - _OPEN_TELEMETRY_TRACER_NAME: str = "google.cloud.pubsub_v1" - _OPEN_TELEMETRY_MESSAGING_SYSTEM: str = "gcp_pubsub" assert subscription_id is not None assert project_id is not None tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) @@ -231,3 +229,27 @@ def start_modack_span( end_on_exit=False, ) as modack_span: return modack_span + + +def start_ack_span( + subscription_id: str, + message_count: int, + project_id: str, + links: List[trace.Link], +) -> trace.Span: + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name=f"{subscription_id} ack", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.batch.message_count": message_count, + "messaging.operation": "ack", + "gcp.project_id": project_id, + "messaging.destination.name": subscription_id, + "code.function": "ack", + }, + kind=trace.SpanKind.CLIENT, + links=links, + end_on_exit=False, + ) as ack_span: + return ack_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 0fc02ddeb..6accf37d0 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -26,11 +26,14 @@ import warnings from google.api_core.retry import exponential_sleep_generator +from opentelemetry import trace + from google.cloud.pubsub_v1.subscriber._protocol import helper_threads from google.cloud.pubsub_v1.subscriber._protocol import requests from google.cloud.pubsub_v1.subscriber.exceptions import ( AcknowledgeStatus, ) +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import start_ack_span if typing.TYPE_CHECKING: # pragma: NO COVER import queue @@ -232,18 +235,49 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: items_gen = iter(items) ack_ids_gen = (item.ack_id for item in items) total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE)) + subscription_id: Optional[str] = None + project_id: Optional[str] = None for item in items: if item.opentelemetry_data: item.opentelemetry_data.add_subscribe_span_event("ack start") + if subscription_id is None: + subscription_id = item.opentelemetry_data.subscription_id + if project_id is None: + project_id = item.opentelemetry_data.project_id + for _ in range(total_chunks): ack_reqs_dict = { req.ack_id: req for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } + + subscribe_links: List[trace.Link] = [] + for ack_req in ack_reqs_dict.values(): + if ack_req.opentelemetry_data: + subscribe_span: Optional[ + trace.Span + ] = ack_req.opentelemetry_data.subscribe_span + if ( + subscribe_span + and subscribe_span.get_span_context().trace_flags.sampled + ): + subscribe_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + ack_span: Optional[trace.Span] = None + if subscription_id and project_id: + ack_span = start_ack_span( + subscription_id, + len(ack_reqs_dict), + project_id, + subscribe_links, + ) requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=list(itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE)), ack_reqs_dict=ack_reqs_dict, ) + if ack_span: + ack_span.end() for completed_ack in requests_completed: if completed_ack.opentelemetry_data: diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 869df65bf..5ec72b2df 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -17,6 +17,8 @@ import sys import threading +from opentelemetry import trace + from google.cloud.pubsub_v1.subscriber._protocol import dispatcher from google.cloud.pubsub_v1.subscriber._protocol import helper_threads from google.cloud.pubsub_v1.subscriber._protocol import requests @@ -410,8 +412,15 @@ def test_opentelemetry_ack(span_exporter): ) dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) - opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) - opentelemetry_data.start_subscribe_span( + data1 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data1.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + data2 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data2.start_subscribe_span( subscription="projects/projectID/subscriptions/subscriptionID", exactly_once_enabled=True, ack_id="ack_id", @@ -424,22 +433,46 @@ def test_opentelemetry_ack(span_exporter): time_to_ack=20, ordering_key="", future=None, - opentelemetry_data=opentelemetry_data, - ) + opentelemetry_data=data1, + ), + requests.AckRequest( + ack_id="ack_id_string2", + byte_size=0, + time_to_ack=20, + ordering_key="", + future=None, + opentelemetry_data=data2, + ), ] manager.send_unary_ack.return_value = (items, []) - dispatcher_.ack(items) + mock_span_context = mock.Mock(spec=trace.SpanContext) + mock_span_context.trace_flags.sampled = False + with mock.patch.object( + data2._subscribe_span, "get_span_context", return_value=mock_span_context + ): + dispatcher_.ack(items) spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - subscribe_span = spans[0] - - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" - assert len(subscribe_span.events) == 2 - assert subscribe_span.events[0].name == "ack start" - assert subscribe_span.events[1].name == "ack end" + assert len(spans) == 3 + ack_span = spans[0] + + for subscribe_span in spans[1:]: + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "ack start" + assert subscribe_span.events[1].name == "ack end" + + assert ack_span.name == "subscriptionID ack" + assert ack_span.kind == trace.SpanKind.CLIENT + assert ack_span.parent is None + assert len(ack_span.links) == 1 + assert ack_span.attributes["messaging.system"] == "gcp_pubsub" + assert ack_span.attributes["messaging.batch.message_count"] == 2 + assert ack_span.attributes["messaging.operation"] == "ack" + assert ack_span.attributes["gcp.project_id"] == "projectID" + assert ack_span.attributes["messaging.destination.name"] == "subscriptionID" + assert ack_span.attributes["code.function"] == "ack" def test_ack(): From 54a88baf8fcb1e7226b34b129566cba3eae8699f Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 23 Sep 2024 16:22:51 +0000 Subject: [PATCH 42/56] Add ack span for dispatcher._retry_ack --- .../subscriber/_protocol/dispatcher.py | 29 ++++++++++ .../pubsub_v1/subscriber/test_dispatcher.py | 56 +++++++++++++++---- 2 files changed, 73 insertions(+), 12 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 6accf37d0..949e757d6 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -324,14 +324,43 @@ def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): time.sleep(time_to_wait) ack_reqs_dict = {req.ack_id: req for req in requests_to_retry} + subscription_id: Optional[str] = None + project_id: Optional[str] = None + subscribe_links: List[trace.Link] = [] for req in requests_to_retry: if req.opentelemetry_data: req.opentelemetry_data.add_subscribe_span_event("ack start") + if subscription_id is None: + subscription_id = req.opentelemetry_data.subscription_id + if project_id is None: + project_id = req.opentelemetry_data.project_id + subscribe_span: Optional[ + trace.Span + ] = req.opentelemetry_data.subscribe_span + if ( + subscribe_span + and subscribe_span.get_span_context().trace_flags.sampled + ): + subscribe_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + ack_span: Optional[trace.Span] = None + if subscription_id and project_id: + ack_span = start_ack_span( + subscription_id, + len(ack_reqs_dict), + project_id, + subscribe_links, + ) + requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=[req.ack_id for req in requests_to_retry], ack_reqs_dict=ack_reqs_dict, ) + if ack_span: + ack_span.end() + for completed_ack in requests_completed: if completed_ack.opentelemetry_data: completed_ack.opentelemetry_data.add_subscribe_span_event("ack end") diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 5ec72b2df..fd1f916ea 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -596,8 +596,15 @@ def test_opentelemetry_retry_acks(span_exporter): streaming_pull_manager.StreamingPullManager, instance=True ) dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) - opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) - opentelemetry_data.start_subscribe_span( + data1 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data1.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id", + delivery_attempt=5, + ) + data2 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data2.start_subscribe_span( subscription="projects/projectID/subscriptions/subscriptionID", exactly_once_enabled=True, ack_id="ack_id", @@ -612,23 +619,48 @@ def test_opentelemetry_retry_acks(span_exporter): time_to_ack=20, ordering_key="", future=f, - opentelemetry_data=opentelemetry_data, - ) + opentelemetry_data=data1, + ), + requests.AckRequest( + ack_id="ack_id_string2", + byte_size=0, + time_to_ack=20, + ordering_key="", + future=f, + opentelemetry_data=data2, + ), ] manager.send_unary_ack.side_effect = [(items, [])] + mock_span_context = mock.Mock(spec=trace.SpanContext) + mock_span_context.trace_flags.sampled = False with mock.patch("time.sleep", return_value=None): - dispatcher_._retry_acks(items) + with mock.patch.object( + data2._subscribe_span, "get_span_context", return_value=mock_span_context + ): + dispatcher_._retry_acks(items) spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - subscribe_span = spans[0] + assert len(spans) == 3 + ack_span = spans[0] - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" - assert len(subscribe_span.events) == 2 - assert subscribe_span.events[0].name == "ack start" - assert subscribe_span.events[1].name == "ack end" + for subscribe_span in spans[1:]: + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "acked" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "ack start" + assert subscribe_span.events[1].name == "ack end" + + assert ack_span.name == "subscriptionID ack" + assert ack_span.kind == trace.SpanKind.CLIENT + assert ack_span.parent is None + assert len(ack_span.links) == 1 + assert ack_span.attributes["messaging.system"] == "gcp_pubsub" + assert ack_span.attributes["messaging.batch.message_count"] == 2 + assert ack_span.attributes["messaging.operation"] == "ack" + assert ack_span.attributes["gcp.project_id"] == "projectID" + assert ack_span.attributes["messaging.destination.name"] == "subscriptionID" + assert ack_span.attributes["code.function"] == "ack" def test_retry_acks(): From 28a192ae6a9b43070952b06adc481579b9a4ce23 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 23 Sep 2024 18:40:57 +0000 Subject: [PATCH 43/56] Add nack span --- .../open_telemetry/subscribe_opentelemetry.py | 24 +++++++ .../subscriber/_protocol/dispatcher.py | 36 +++++++++- .../pubsub_v1/subscriber/test_dispatcher.py | 70 ++++++++++++++----- 3 files changed, 113 insertions(+), 17 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index aae1ea9a4..18d8d0a5b 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -253,3 +253,27 @@ def start_ack_span( end_on_exit=False, ) as ack_span: return ack_span + + +def start_nack_span( + subscription_id: str, + message_count: int, + project_id: str, + links: List[trace.Link], +) -> trace.Span: + tracer = trace.get_tracer(_OPEN_TELEMETRY_TRACER_NAME) + with tracer.start_as_current_span( + name=f"{subscription_id} nack", + attributes={ + "messaging.system": _OPEN_TELEMETRY_MESSAGING_SYSTEM, + "messaging.batch.message_count": message_count, + "messaging.operation": "nack", + "gcp.project_id": project_id, + "messaging.destination.name": subscription_id, + "code.function": "modify_ack_deadline", + }, + kind=trace.SpanKind.CLIENT, + links=links, + end_on_exit=False, + ) as nack_span: + return nack_span diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 949e757d6..091915380 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -33,7 +33,10 @@ from google.cloud.pubsub_v1.subscriber.exceptions import ( AcknowledgeStatus, ) -from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import start_ack_span +from google.cloud.pubsub_v1.open_telemetry.subscribe_opentelemetry import ( + start_ack_span, + start_nack_span, +) if typing.TYPE_CHECKING: # pragma: NO COVER import queue @@ -416,10 +419,18 @@ def modify_ack_deadline( ack_ids_gen = (item.ack_id for item in items) deadline_seconds_gen = (item.seconds for item in items) total_chunks = int(math.ceil(len(items) / _ACK_IDS_BATCH_SIZE)) + + subscription_id: Optional[str] = None + project_id: Optional[str] = None + for item in items: if item.opentelemetry_data: if math.isclose(item.seconds, 0): item.opentelemetry_data.add_subscribe_span_event("nack start") + if subscription_id is None: + subscription_id = item.opentelemetry_data.subscription_id + if project_id is None: + project_id = item.opentelemetry_data.project_id else: item.opentelemetry_data.add_subscribe_span_event("modack start") for _ in range(total_chunks): @@ -427,6 +438,27 @@ def modify_ack_deadline( req.ack_id: req for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } + subscribe_links: List[trace.Link] = [] + for ack_req in ack_reqs_dict.values(): + if ack_req.opentelemetry_data and math.isclose(ack_req.seconds, 0): + subscribe_span: Optional[ + trace.Span + ] = ack_req.opentelemetry_data.subscribe_span + if ( + subscribe_span + and subscribe_span.get_span_context().trace_flags.sampled + ): + subscribe_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + nack_span: Optional[trace.Span] = None + if subscription_id and project_id and len(subscribe_links) > 0: + nack_span = start_nack_span( + subscription_id, + len(ack_reqs_dict), + project_id, + subscribe_links, + ) requests_to_retry: List[requests.ModAckRequest] requests_completed: Optional[List[requests.ModAckRequest]] = None if default_deadline is None: @@ -450,6 +482,8 @@ def modify_ack_deadline( ack_reqs_dict=ack_reqs_dict, default_deadline=default_deadline, ) + if nack_span: + nack_span.end() assert ( len(requests_to_retry) <= _ACK_IDS_BATCH_SIZE ), "Too many requests to be retried." diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index fd1f916ea..812ad0fb2 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -895,43 +895,81 @@ def test_opentelemetry_nack(span_exporter): ) dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) - opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) - opentelemetry_data.start_subscribe_span( + data1 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data1.start_subscribe_span( subscription="projects/projectID/subscriptions/subscriptionID", exactly_once_enabled=True, ack_id="ack_id", delivery_attempt=5, ) + data2 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data2.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id2", + delivery_attempt=5, + ) items = [ requests.NackRequest( - ack_id="ack_id_string", + ack_id="ack_id", byte_size=10, ordering_key="", future=None, - opentelemetry_data=opentelemetry_data, - ) + opentelemetry_data=data1, + ), + requests.NackRequest( + ack_id="ack_id2", + byte_size=10, + ordering_key="", + future=None, + opentelemetry_data=data2, + ), ] response_items = [ requests.ModAckRequest( - ack_id="ack_id_string", + ack_id="ack_id", seconds=0, future=None, - opentelemetry_data=opentelemetry_data, - ) + opentelemetry_data=data1, + ), + requests.ModAckRequest( + ack_id="ack_id2", + seconds=0, + future=None, + opentelemetry_data=data2, + ), ] manager.send_unary_modack.return_value = (response_items, []) - dispatcher_.nack(items) + + mock_span_context = mock.Mock(spec=trace.SpanContext) + mock_span_context.trace_flags.sampled = False + with mock.patch.object( + data2._subscribe_span, "get_span_context", return_value=mock_span_context + ): + dispatcher_.nack(items) spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - subscribe_span = spans[0] - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" - assert len(subscribe_span.events) == 2 - assert subscribe_span.events[0].name == "nack start" - assert subscribe_span.events[1].name == "nack end" + assert len(spans) == 3 + nack_span = spans[0] + for subscribe_span in spans[1:]: + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" + assert len(subscribe_span.events) == 2 + assert subscribe_span.events[0].name == "nack start" + assert subscribe_span.events[1].name == "nack end" + + assert nack_span.name == "subscriptionID nack" + assert nack_span.kind == trace.SpanKind.CLIENT + assert nack_span.parent is None + assert len(nack_span.links) == 1 + assert nack_span.attributes["messaging.system"] == "gcp_pubsub" + assert nack_span.attributes["messaging.batch.message_count"] == 2 + assert nack_span.attributes["messaging.operation"] == "nack" + assert nack_span.attributes["gcp.project_id"] == "projectID" + assert nack_span.attributes["messaging.destination.name"] == "subscriptionID" + assert nack_span.attributes["code.function"] == "modify_ack_deadline" def test_nack(): From c8c5eb9bae16a52a782832e197e9370cbf6cb310 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Mon, 23 Sep 2024 21:41:46 +0000 Subject: [PATCH 44/56] Add nack span for dispatcher._retry_modacks --- .../subscriber/_protocol/dispatcher.py | 30 ++++++++++ .../pubsub_v1/subscriber/test_dispatcher.py | 56 ++++++++++++++----- 2 files changed, 73 insertions(+), 13 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 091915380..2e1eef17f 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -530,11 +530,41 @@ def _retry_modacks(self, requests_to_retry): time.sleep(time_to_wait) ack_reqs_dict = {req.ack_id: req for req in requests_to_retry} + + subscription_id: Optional[str] = None + project_id: Optional[str] = None + subscribe_links: List[trace.Link] = [] + for ack_req in ack_reqs_dict.values(): + if ack_req.opentelemetry_data and math.isclose(ack_req.seconds, 0): + if subscription_id is None: + subscription_id = ack_req.opentelemetry_data.subscription_id + if project_id is None: + project_id = ack_req.opentelemetry_data.project_id + subscribe_span: Optional[ + trace.Span + ] = ack_req.opentelemetry_data.subscribe_span + if ( + subscribe_span + and subscribe_span.get_span_context().trace_flags.sampled + ): + subscribe_links.append( + trace.Link(subscribe_span.get_span_context()) + ) + nack_span: Optional[trace.Span] = None + if subscription_id and project_id and len(subscribe_links) > 0: + nack_span = start_nack_span( + subscription_id, + len(ack_reqs_dict), + project_id, + subscribe_links, + ) requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=[req.ack_id for req in requests_to_retry], modify_deadline_seconds=[req.seconds for req in requests_to_retry], ack_reqs_dict=ack_reqs_dict, ) + if nack_span: + nack_span.end() for completed_modack in requests_completed: if completed_modack.opentelemetry_data: # nack is a modack with 0 extension seconds. diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 812ad0fb2..79b6d3ac9 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -769,35 +769,65 @@ def test_opentelemetry_retry_nacks(span_exporter): ) dispatcher_ = dispatcher.Dispatcher(manager, mock.sentinel.queue) - opentelemetry_data = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) - opentelemetry_data.start_subscribe_span( + data1 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data1.start_subscribe_span( subscription="projects/projectID/subscriptions/subscriptionID", exactly_once_enabled=True, - ack_id="ack_id", + ack_id="ack_id1", + delivery_attempt=5, + ) + data2 = SubscribeOpenTelemetry(message=PubsubMessage(data=b"foo")) + data2.start_subscribe_span( + subscription="projects/projectID/subscriptions/subscriptionID", + exactly_once_enabled=True, + ack_id="ack_id2", delivery_attempt=5, ) f = futures.Future() items = [ requests.ModAckRequest( - ack_id="ack_id_string", + ack_id="ack_id1", seconds=0, future=f, - opentelemetry_data=opentelemetry_data, - ) + opentelemetry_data=data1, + ), + requests.ModAckRequest( + ack_id="ack_id2", + seconds=0, + future=f, + opentelemetry_data=data2, + ), ] manager.send_unary_modack.side_effect = [(items, [])] + mock_span_context = mock.Mock(spec=trace.SpanContext) + mock_span_context.trace_flags.sampled = False with mock.patch("time.sleep", return_value=None): - dispatcher_._retry_modacks(items) + with mock.patch.object( + data2._subscribe_span, "get_span_context", return_value=mock_span_context + ): + dispatcher_._retry_modacks(items) spans = span_exporter.get_finished_spans() - assert len(spans) == 1 - subscribe_span = spans[0] + assert len(spans) == 3 + nack_span = spans[0] - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" - assert len(subscribe_span.events) == 1 - assert subscribe_span.events[0].name == "nack end" + for subscribe_span in spans[1:]: + assert "messaging.gcp_pubsub.result" in subscribe_span.attributes + assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "nacked" + assert len(subscribe_span.events) == 1 + assert subscribe_span.events[0].name == "nack end" + + assert nack_span.name == "subscriptionID nack" + assert nack_span.kind == trace.SpanKind.CLIENT + assert nack_span.parent is None + assert len(nack_span.links) == 1 + assert nack_span.attributes["messaging.system"] == "gcp_pubsub" + assert nack_span.attributes["messaging.batch.message_count"] == 2 + assert nack_span.attributes["messaging.operation"] == "nack" + assert nack_span.attributes["gcp.project_id"] == "projectID" + assert nack_span.attributes["messaging.destination.name"] == "subscriptionID" + assert nack_span.attributes["code.function"] == "modify_ack_deadline" def test_retry_modacks(): From 6ec7c3560d3cb013c858a754b89f615eda7570c2 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 05:30:16 +0000 Subject: [PATCH 45/56] Fix modacks * Currently, subscribe_span is ended after modack end event is added to the subscribe_span. This should not happen, since subscribe_span should be ended only after ack,nack or drop, but not modack * Remove an extraneous modack start that was present in streaming_pull_manager.send_lease_modacks(). This is not required, since modack start and end events will anyways be added to the subscribe span in the dispatcher.modify_ack_deadline() method. * Replace the NamedTuple._replace() methods with creation of new NamedTuple ModackRequest items. This is because NamedTuple._replace() returns a new NamedTuple, rather than replacing the existing one. This results in the opentelemetry data not being plumbed down into the dispatcher.modify_ack_deadline_method. --- .../subscriber/_protocol/dispatcher.py | 10 +--- .../_protocol/streaming_pull_manager.py | 46 +++++++++++++------ .../pubsub_v1/subscriber/test_dispatcher.py | 10 ++-- .../subscriber/test_streaming_pull_manager.py | 7 +-- 4 files changed, 41 insertions(+), 32 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 2e1eef17f..861fcb5e5 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -498,14 +498,11 @@ def modify_ack_deadline( completed_modack.opentelemetry_data.add_subscribe_span_event( "nack end" ) + completed_modack.opentelemetry_data.end_subscribe_span() else: - completed_modack.opentelemetry_data.set_subscribe_span_result( - "modacked" - ) completed_modack.opentelemetry_data.add_subscribe_span_event( "modack end" ) - completed_modack.opentelemetry_data.end_subscribe_span() # Retry on a separate thread so the dispatcher thread isn't blocked # by sleeps. @@ -575,14 +572,11 @@ def _retry_modacks(self, requests_to_retry): completed_modack.opentelemetry_data.add_subscribe_span_event( "nack end" ) + completed_modack.opentelemetry_data.end_subscribe_span() else: - completed_modack.opentelemetry_data.set_subscribe_span_result( - "modacked" - ) completed_modack.opentelemetry_data.add_subscribe_span_event( "modack end" ) - completed_modack.opentelemetry_data.end_subscribe_span() def nack(self, items: Sequence[requests.NackRequest]) -> None: """Explicitly deny receipt of messages. diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index ffcc2c899..eb6a7a07d 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1057,17 +1057,25 @@ def _send_lease_modacks( with self._exactly_once_enabled_lock: exactly_once_enabled = self._exactly_once_enabled if exactly_once_enabled: - items = [ - requests.ModAckRequest(ack_id, ack_deadline, futures.Future()) - for ack_id in ack_ids - ] + items: List[requests.ModAckRequest] = [] if self._client.open_telemetry_enabled: - for item, data in zip( - items, opentelemetry_data + for ack_id, data in zip( + ack_ids, opentelemetry_data ): # pragma: NO COVER # Identical code covered in the same function below assert data is not None - data.add_subscribe_span_event("modack start") - item._replace(opentelemetry_data=data) + items.append( + requests.ModAckRequest( + ack_id, + ack_deadline, + futures.Future(), + data, + ) + ) + else: + items = [ + requests.ModAckRequest(ack_id, ack_deadline, futures.Future()) + for ack_id in ack_ids + ] assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) @@ -1093,15 +1101,23 @@ def _send_lease_modacks( expired_ack_ids.add(req.ack_id) return expired_ack_ids else: - items = [ - requests.ModAckRequest(ack_id, self.ack_deadline, None) - for ack_id in ack_ids - ] + items: List[requests.ModAckRequest] = [] if self._client.open_telemetry_enabled: - for item, data in zip(items, opentelemetry_data): + for ack_id, data in zip(ack_ids, opentelemetry_data): assert data is not None - data.add_subscribe_span_event("modack start") - item._replace(opentelemetry_data=data) + items.append( + requests.ModAckRequest( + ack_id, + self.ack_deadline, + None, + data, + ) + ) + else: + items = [ + requests.ModAckRequest(ack_id, self.ack_deadline, None) + for ack_id in ack_ids + ] assert self._dispatcher is not None self._dispatcher.modify_ack_deadline(items, ack_deadline) if modack_span: diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 79b6d3ac9..bf557ea2f 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -395,12 +395,13 @@ def test_opentelemetry_modify_ack_deadline(span_exporter): manager.send_unary_modack.return_value = (items, []) dispatcher_.modify_ack_deadline(items) + # Subscribe span would not have ended as part of a modack. So, end it + # in the test, so that we can export and assert its contents. + opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() assert len(spans) == 1 subscribe_span = spans[0] - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "modacked" assert len(subscribe_span.events) == 2 assert subscribe_span.events[0].name == "modack start" assert subscribe_span.events[1].name == "modack end" @@ -753,12 +754,13 @@ def test_opentelemetry_retry_modacks(span_exporter): with mock.patch("time.sleep", return_value=None): dispatcher_._retry_modacks(items) + # Subscribe span wouldn't be ended for modacks. So, end it in the test, so + # that we can export and assert its contents. + opentelemetry_data.end_subscribe_span() spans = span_exporter.get_finished_spans() assert len(spans) == 1 subscribe_span = spans[0] - assert "messaging.gcp_pubsub.result" in subscribe_span.attributes - assert subscribe_span.attributes["messaging.gcp_pubsub.result"] == "modacked" assert len(subscribe_span.events) == 1 assert subscribe_span.events[0].name == "modack end" diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index fd00d1dfa..3219355cd 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -680,11 +680,8 @@ def test_opentelemetry__send_lease_modacks(span_exporter, receipt_modack): assert len(spans) == 3 modack_span, subscribe_span1, subscribe_span2 = spans - assert len(subscribe_span1.events) == 1 - assert subscribe_span1.events[0].name == "modack start" - - assert len(subscribe_span2.events) == 1 - assert subscribe_span2.events[0].name == "modack start" + assert len(subscribe_span1.events) == 0 + assert len(subscribe_span2.events) == 0 assert modack_span.name == "subscriptionID modack" assert modack_span.parent is None From 79369110f7cde003a7a5f6917d8bc0c8989068d1 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 14:26:20 +0000 Subject: [PATCH 46/56] Fix mypy errors --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 12 +++++------- .../subscriber/_protocol/streaming_pull_manager.py | 10 +++++----- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 861fcb5e5..3b2f7918a 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -528,18 +528,16 @@ def _retry_modacks(self, requests_to_retry): ack_reqs_dict = {req.ack_id: req for req in requests_to_retry} - subscription_id: Optional[str] = None - project_id: Optional[str] = None - subscribe_links: List[trace.Link] = [] + subscription_id = None + project_id = None + subscribe_links = [] for ack_req in ack_reqs_dict.values(): if ack_req.opentelemetry_data and math.isclose(ack_req.seconds, 0): if subscription_id is None: subscription_id = ack_req.opentelemetry_data.subscription_id if project_id is None: project_id = ack_req.opentelemetry_data.project_id - subscribe_span: Optional[ - trace.Span - ] = ack_req.opentelemetry_data.subscribe_span + subscribe_span = ack_req.opentelemetry_data.subscribe_span if ( subscribe_span and subscribe_span.get_span_context().trace_flags.sampled @@ -547,7 +545,7 @@ def _retry_modacks(self, requests_to_retry): subscribe_links.append( trace.Link(subscribe_span.get_span_context()) ) - nack_span: Optional[trace.Span] = None + nack_span = None if subscription_id and project_id and len(subscribe_links) > 0: nack_span = start_nack_span( subscription_id, diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index eb6a7a07d..d4bfec6dd 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1057,13 +1057,13 @@ def _send_lease_modacks( with self._exactly_once_enabled_lock: exactly_once_enabled = self._exactly_once_enabled if exactly_once_enabled: - items: List[requests.ModAckRequest] = [] + eod_items: List[requests.ModAckRequest] = [] if self._client.open_telemetry_enabled: for ack_id, data in zip( ack_ids, opentelemetry_data ): # pragma: NO COVER # Identical code covered in the same function below assert data is not None - items.append( + eod_items.append( requests.ModAckRequest( ack_id, ack_deadline, @@ -1072,19 +1072,19 @@ def _send_lease_modacks( ) ) else: - items = [ + eod_items = [ requests.ModAckRequest(ack_id, ack_deadline, futures.Future()) for ack_id in ack_ids ] assert self._dispatcher is not None - self._dispatcher.modify_ack_deadline(items, ack_deadline) + self._dispatcher.modify_ack_deadline(eod_items, ack_deadline) if ( modack_span ): # pragma: NO COVER # Identical code covered in the same function below modack_span.end() expired_ack_ids = set() - for req in items: + for req in eod_items: try: assert req.future is not None req.future.result() From 5874a965c00e9279982880ffdad72a0f73dd8565 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 14:50:29 +0000 Subject: [PATCH 47/56] Add links to subscribe span of the sampled ack_spans in dispatcher.ack --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 9 +++++++++ tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 12 ++++++++++++ 2 files changed, 21 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 3b2f7918a..90afae0d6 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -255,6 +255,7 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: } subscribe_links: List[trace.Link] = [] + subscribe_spans: List[trace.Span] = [] for ack_req in ack_reqs_dict.values(): if ack_req.opentelemetry_data: subscribe_span: Optional[ @@ -267,6 +268,7 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: subscribe_links.append( trace.Link(subscribe_span.get_span_context()) ) + subscribe_spans.append(subscribe_span) ack_span: Optional[trace.Span] = None if subscription_id and project_id: ack_span = start_ack_span( @@ -275,6 +277,13 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: project_id, subscribe_links, ) + if ( + ack_span and ack_span.get_span_context().trace_flags.sampled + ): # pragma: NO COVER + ack_span_context: trace.SpanContext = ack_span.get_span_context() + for subscribe_span in subscribe_spans: + subscribe_span.add_link(ack_span_context) + requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=list(itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE)), ack_reqs_dict=ack_reqs_dict, diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index bf557ea2f..6e124f499 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -407,6 +407,10 @@ def test_opentelemetry_modify_ack_deadline(span_exporter): assert subscribe_span.events[1].name == "modack end" +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) def test_opentelemetry_ack(span_exporter): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True @@ -464,6 +468,14 @@ def test_opentelemetry_ack(span_exporter): assert subscribe_span.events[0].name == "ack start" assert subscribe_span.events[1].name == "ack end" + # This subscribe span is sampled, so we expect it to be linked to the ack + # span. + assert len(spans[1].links) == 1 + assert spans[1].links[0].context == ack_span.context + # This subscribe span is not sampled, so we expect it to not be linked to + # the ack span + assert len(spans[2].links) == 0 + assert ack_span.name == "subscriptionID ack" assert ack_span.kind == trace.SpanKind.CLIENT assert ack_span.parent is None From a7915709dedeb2dd493e2515cc08cff64d2301b3 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 15:19:48 +0000 Subject: [PATCH 48/56] Add links to ack_span in the subscribe_spans in dispatcher.retry_ack() --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 8 ++++++++ tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 90afae0d6..3824aab3d 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -339,6 +339,7 @@ def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): subscription_id: Optional[str] = None project_id: Optional[str] = None subscribe_links: List[trace.Link] = [] + subscribe_spans: List[trace.Span] = [] for req in requests_to_retry: if req.opentelemetry_data: req.opentelemetry_data.add_subscribe_span_event("ack start") @@ -356,6 +357,7 @@ def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): subscribe_links.append( trace.Link(subscribe_span.get_span_context()) ) + subscribe_spans.append(subscribe_span) ack_span: Optional[trace.Span] = None if subscription_id and project_id: ack_span = start_ack_span( @@ -364,6 +366,12 @@ def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): project_id, subscribe_links, ) + if ( + ack_span and ack_span.get_span_context().trace_flags.sampled + ): # pragma: NO COVER + ack_span_context: trace.SpanContext = ack_span.get_span_context() + for subscribe_span in subscribe_spans: + subscribe_span.add_link(ack_span_context) requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=[req.ack_id for req in requests_to_retry], diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 6e124f499..b752ef426 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -604,6 +604,10 @@ def test_retry_acks_in_new_thread(): assert ctor_call.kwargs["daemon"] +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) def test_opentelemetry_retry_acks(span_exporter): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True @@ -664,6 +668,14 @@ def test_opentelemetry_retry_acks(span_exporter): assert subscribe_span.events[0].name == "ack start" assert subscribe_span.events[1].name == "ack end" + # This subscribe span is sampled, so we expect it to be linked to the ack + # span. + assert len(spans[1].links) == 1 + assert spans[1].links[0].context == ack_span.context + # This subscribe span is not sampled, so we expect it to not be linked to + # the ack span + assert len(spans[2].links) == 0 + assert ack_span.name == "subscriptionID ack" assert ack_span.kind == trace.SpanKind.CLIENT assert ack_span.parent is None From 3206c192bf567be0d087c011c20c2f15308b703d Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 15:25:59 +0000 Subject: [PATCH 49/56] Add links to nack spans in the subscribe span in dispatcher.modify_ack_deadlin() --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 8 ++++++++ tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 3824aab3d..a120d9223 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -456,6 +456,7 @@ def modify_ack_deadline( for req in itertools.islice(items_gen, _ACK_IDS_BATCH_SIZE) } subscribe_links: List[trace.Link] = [] + subscribe_spans: List[trace.Span] = [] for ack_req in ack_reqs_dict.values(): if ack_req.opentelemetry_data and math.isclose(ack_req.seconds, 0): subscribe_span: Optional[ @@ -468,6 +469,7 @@ def modify_ack_deadline( subscribe_links.append( trace.Link(subscribe_span.get_span_context()) ) + subscribe_spans.append(subscribe_span) nack_span: Optional[trace.Span] = None if subscription_id and project_id and len(subscribe_links) > 0: nack_span = start_nack_span( @@ -476,6 +478,12 @@ def modify_ack_deadline( project_id, subscribe_links, ) + if ( + nack_span and nack_span.get_span_context().trace_flags.sampled + ): # pragma: NO COVER + nack_span_context: trace.SpanContext = nack_span.get_span_context() + for subscribe_span in subscribe_spans: + subscribe_span.add_link(nack_span_context) requests_to_retry: List[requests.ModAckRequest] requests_completed: Optional[List[requests.ModAckRequest]] = None if default_deadline is None: diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index b752ef426..babf7ecb3 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -945,6 +945,10 @@ def test_drop_ordered_messages(): manager.maybe_resume_consumer.assert_called_once() +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) def test_opentelemetry_nack(span_exporter): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True @@ -1016,6 +1020,14 @@ def test_opentelemetry_nack(span_exporter): assert subscribe_span.events[0].name == "nack start" assert subscribe_span.events[1].name == "nack end" + # This subscribe span is sampled, so we expect it to be linked to the nack + # span. + assert len(spans[1].links) == 1 + assert spans[1].links[0].context == nack_span.context + # This subscribe span is not sampled, so we expect it to not be linked to + # the nack span + assert len(spans[2].links) == 0 + assert nack_span.name == "subscriptionID nack" assert nack_span.kind == trace.SpanKind.CLIENT assert nack_span.parent is None From 14250a1946653dd10dcdacbd52f0204acd6ba61b Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 15:39:15 +0000 Subject: [PATCH 50/56] Add links to nack_spans in subscribe_span in dispatcher.retry_nacks() --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 8 ++++++++ tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 12 ++++++++++++ 2 files changed, 20 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index a120d9223..31ffa2654 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -556,6 +556,7 @@ def _retry_modacks(self, requests_to_retry): subscription_id = None project_id = None subscribe_links = [] + subscribe_spans = [] for ack_req in ack_reqs_dict.values(): if ack_req.opentelemetry_data and math.isclose(ack_req.seconds, 0): if subscription_id is None: @@ -570,6 +571,7 @@ def _retry_modacks(self, requests_to_retry): subscribe_links.append( trace.Link(subscribe_span.get_span_context()) ) + subscribe_spans.append(subscribe_span) nack_span = None if subscription_id and project_id and len(subscribe_links) > 0: nack_span = start_nack_span( @@ -578,6 +580,12 @@ def _retry_modacks(self, requests_to_retry): project_id, subscribe_links, ) + if ( + nack_span and nack_span.get_span_context().trace_flags.sampled + ): # pragma: NO COVER + nack_span_context: trace.SpanContext = nack_span.get_span_context() + for subscribe_span in subscribe_spans: + subscribe_span.add_link(nack_span_context) requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=[req.ack_id for req in requests_to_retry], modify_deadline_seconds=[req.seconds for req in requests_to_retry], diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index babf7ecb3..66d4245fd 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -789,6 +789,10 @@ def test_opentelemetry_retry_modacks(span_exporter): assert subscribe_span.events[0].name == "modack end" +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) def test_opentelemetry_retry_nacks(span_exporter): manager = mock.create_autospec( streaming_pull_manager.StreamingPullManager, instance=True @@ -844,6 +848,14 @@ def test_opentelemetry_retry_nacks(span_exporter): assert len(subscribe_span.events) == 1 assert subscribe_span.events[0].name == "nack end" + # This subscribe span is sampled, so we expect it to be linked to the nack + # span. + assert len(spans[1].links) == 1 + assert spans[1].links[0].context == nack_span.context + # This subscribe span is not sampled, so we expect it to not be linked to + # the nack span + assert len(spans[2].links) == 0 + assert nack_span.name == "subscriptionID nack" assert nack_span.kind == trace.SpanKind.CLIENT assert nack_span.parent is None From 815f2a37b59455e9b9537beddb3a3a1df390f409 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 15:50:53 +0000 Subject: [PATCH 51/56] Add attributes to the links from subscribe_span to ack_span --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 14 ++++++++++++-- tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 31ffa2654..3a02eeb66 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -282,7 +282,12 @@ def ack(self, items: Sequence[requests.AckRequest]) -> None: ): # pragma: NO COVER ack_span_context: trace.SpanContext = ack_span.get_span_context() for subscribe_span in subscribe_spans: - subscribe_span.add_link(ack_span_context) + subscribe_span.add_link( + context=ack_span_context, + attributes={ + "messaging.operation.name": "ack", + }, + ) requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=list(itertools.islice(ack_ids_gen, _ACK_IDS_BATCH_SIZE)), @@ -371,7 +376,12 @@ def _retry_acks(self, requests_to_retry: List[requests.AckRequest]): ): # pragma: NO COVER ack_span_context: trace.SpanContext = ack_span.get_span_context() for subscribe_span in subscribe_spans: - subscribe_span.add_link(ack_span_context) + subscribe_span.add_link( + context=ack_span_context, + attributes={ + "messaging.operation.name": "ack", + }, + ) requests_completed, requests_to_retry = self._manager.send_unary_ack( ack_ids=[req.ack_id for req in requests_to_retry], diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 66d4245fd..0eea28df8 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -472,6 +472,8 @@ def test_opentelemetry_ack(span_exporter): # span. assert len(spans[1].links) == 1 assert spans[1].links[0].context == ack_span.context + assert len(spans[1].links[0].attributes) == 1 + assert spans[1].links[0].attributes["messaging.operation.name"] == "ack" # This subscribe span is not sampled, so we expect it to not be linked to # the ack span assert len(spans[2].links) == 0 @@ -672,6 +674,8 @@ def test_opentelemetry_retry_acks(span_exporter): # span. assert len(spans[1].links) == 1 assert spans[1].links[0].context == ack_span.context + assert len(spans[1].links[0].attributes) == 1 + assert spans[1].links[0].attributes["messaging.operation.name"] == "ack" # This subscribe span is not sampled, so we expect it to not be linked to # the ack span assert len(spans[2].links) == 0 From 94feeec64132cd41771fe1a881eea59479034e29 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 15:56:34 +0000 Subject: [PATCH 52/56] Add attributes to links in subscribe_spans to nack_spans --- .../pubsub_v1/subscriber/_protocol/dispatcher.py | 14 ++++++++++++-- tests/unit/pubsub_v1/subscriber/test_dispatcher.py | 4 ++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 3a02eeb66..081c2ebe4 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -493,7 +493,12 @@ def modify_ack_deadline( ): # pragma: NO COVER nack_span_context: trace.SpanContext = nack_span.get_span_context() for subscribe_span in subscribe_spans: - subscribe_span.add_link(nack_span_context) + subscribe_span.add_link( + context=nack_span_context, + attributes={ + "messaging.operation.name": "nack", + }, + ) requests_to_retry: List[requests.ModAckRequest] requests_completed: Optional[List[requests.ModAckRequest]] = None if default_deadline is None: @@ -595,7 +600,12 @@ def _retry_modacks(self, requests_to_retry): ): # pragma: NO COVER nack_span_context: trace.SpanContext = nack_span.get_span_context() for subscribe_span in subscribe_spans: - subscribe_span.add_link(nack_span_context) + subscribe_span.add_link( + context=nack_span_context, + attributes={ + "messaging.operation.name": "nack", + }, + ) requests_completed, requests_to_retry = self._manager.send_unary_modack( modify_deadline_ack_ids=[req.ack_id for req in requests_to_retry], modify_deadline_seconds=[req.seconds for req in requests_to_retry], diff --git a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py index 0eea28df8..5483c48c5 100644 --- a/tests/unit/pubsub_v1/subscriber/test_dispatcher.py +++ b/tests/unit/pubsub_v1/subscriber/test_dispatcher.py @@ -856,6 +856,8 @@ def test_opentelemetry_retry_nacks(span_exporter): # span. assert len(spans[1].links) == 1 assert spans[1].links[0].context == nack_span.context + assert len(spans[1].links[0].attributes) == 1 + assert spans[1].links[0].attributes["messaging.operation.name"] == "nack" # This subscribe span is not sampled, so we expect it to not be linked to # the nack span assert len(spans[2].links) == 0 @@ -1040,6 +1042,8 @@ def test_opentelemetry_nack(span_exporter): # span. assert len(spans[1].links) == 1 assert spans[1].links[0].context == nack_span.context + assert len(spans[1].links[0].attributes) == 1 + assert spans[1].links[0].attributes["messaging.operation.name"] == "nack" # This subscribe span is not sampled, so we expect it to not be linked to # the nack span assert len(spans[2].links) == 0 From 5dd8fb8c513df998872aad1e9e8c1d1a85d56ba7 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 16:07:29 +0000 Subject: [PATCH 53/56] Add links to modack spans in subscribe span --- .../subscriber/_protocol/streaming_pull_manager.py | 13 +++++++++++++ .../subscriber/test_streaming_pull_manager.py | 13 +++++++++++++ 2 files changed, 26 insertions(+) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index d4bfec6dd..cc398cf5c 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1032,6 +1032,7 @@ def _send_lease_modacks( modack_span: Optional[trace.Span] = None if self._client.open_telemetry_enabled: subscribe_span_links: List[trace.Link] = [] + subscribe_spans: List[trace.Span] = [] assert len(self._subscription.split("/")) == 4 subscription_id: str = self._subscription.split("/")[3] project_id: str = self._subscription.split("/")[1] @@ -1044,6 +1045,7 @@ def _send_lease_modacks( subscribe_span_links.append( trace.Link(subscribe_span.get_span_context()) ) + subscribe_spans.append(subscribe_span) modack_span = start_modack_span( subscribe_span_links, subscription_id, @@ -1053,6 +1055,17 @@ def _send_lease_modacks( "_send_lease_modacks", receipt_modack, ) + if ( + modack_span and modack_span.get_span_context().trace_flags.sampled + ): # pragma: NO COVER + modack_span_context: trace.SpanContext = modack_span.get_span_context() + for subscribe_span in subscribe_spans: + subscribe_span.add_link( + context=modack_span_context, + attributes={ + "messaging.operation.name": "modack", + }, + ) with self._exactly_once_enabled_lock: exactly_once_enabled = self._exactly_once_enabled diff --git a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py index 3219355cd..4d2d1b98e 100644 --- a/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py +++ b/tests/unit/pubsub_v1/subscriber/test_streaming_pull_manager.py @@ -632,6 +632,10 @@ def test__maybe_release_messages_negative_on_hold_bytes_warning(caplog): assert manager._on_hold_bytes == 0 # should be auto-corrected +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) @pytest.mark.parametrize( "receipt_modack", [ @@ -683,6 +687,11 @@ def test_opentelemetry__send_lease_modacks(span_exporter, receipt_modack): assert len(subscribe_span1.events) == 0 assert len(subscribe_span2.events) == 0 + assert len(subscribe_span1.links) == 0 + assert len(subscribe_span2.links) == 1 + assert subscribe_span2.links[0].context == modack_span.context + assert subscribe_span2.links[0].attributes["messaging.operation.name"] == "modack" + assert modack_span.name == "subscriptionID modack" assert modack_span.parent is None assert modack_span.kind == trace.SpanKind.CLIENT @@ -2760,6 +2769,10 @@ def test_process_requests_mixed_success_and_failure_modacks(): assert future3.result() == subscriber_exceptions.AcknowledgeStatus.SUCCESS +@pytest.mark.skipif( + sys.version_info < (3, 8), + reason="Open Telemetry not supported below Python version 3.8", +) def test_opentelemetry__on_response_subscribe_span_create(span_exporter): manager, _, _, leaser, _, _ = make_running_manager( enable_open_telemetry=True, From b4fdd4018862eedb35e19d4aecfb9a67d9366617 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 21:21:08 +0000 Subject: [PATCH 54/56] Create nack span even if there are no sampled subscribe span --- google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py index 081c2ebe4..fe3771432 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/dispatcher.py @@ -481,7 +481,7 @@ def modify_ack_deadline( ) subscribe_spans.append(subscribe_span) nack_span: Optional[trace.Span] = None - if subscription_id and project_id and len(subscribe_links) > 0: + if subscription_id and project_id: nack_span = start_nack_span( subscription_id, len(ack_reqs_dict), @@ -588,7 +588,7 @@ def _retry_modacks(self, requests_to_retry): ) subscribe_spans.append(subscribe_span) nack_span = None - if subscription_id and project_id and len(subscribe_links) > 0: + if subscription_id and project_id: nack_span = start_nack_span( subscription_id, len(ack_reqs_dict), From ad13b593fcff3d29fe6b532419e42259690155b7 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 21:47:33 +0000 Subject: [PATCH 55/56] Add a note stating that OpenTelemetry traces are subject to change --- google/cloud/pubsub_v1/types.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/google/cloud/pubsub_v1/types.py b/google/cloud/pubsub_v1/types.py index 342ffeb4f..7e94a7250 100644 --- a/google/cloud/pubsub_v1/types.py +++ b/google/cloud/pubsub_v1/types.py @@ -144,7 +144,14 @@ class SubscriberOptions(NamedTuple): """ enable_open_telemetry_tracing: bool = False - """Whether to enable OpenTelemetry tracing.""" + """ + Whether to enable OpenTelemetry tracing. + + Warning: traces are subject to change. The name and attributes of a span might + change without notice. Only use run traces interactively. Don't use in + automation. Running non-interactive traces can cause problems if the underlying + trace architecture changes without notice. + """ # Define the default publisher options. @@ -191,7 +198,14 @@ class PublisherOptions(NamedTuple): ) enable_open_telemetry_tracing: bool = False # disabled by default - """Open Telemetry tracing is enabled if this is set to True.""" + """ + Open Telemetry tracing is enabled if this is set to True. + + Warning: traces are subject to change. The name and attributes of a span might + change without notice. Only use run traces interactively. Don't use in + automation. Running non-interactive traces can cause problems if the underlying + trace architecture changes without notice. + """ # Define the type class and default values for flow control settings. From 3977a6af557bfab9f7f752c4a1e0e3f348f4c293 Mon Sep 17 00:00:00 2001 From: mukund-ananthu Date: Tue, 24 Sep 2024 22:07:26 +0000 Subject: [PATCH 56/56] Split the subscription name at one place * Addressing the review comment --- .../pubsub_v1/open_telemetry/subscribe_opentelemetry.py | 7 ++++--- .../subscriber/_protocol/streaming_pull_manager.py | 7 ++++--- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py index 18d8d0a5b..88870be60 100644 --- a/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py +++ b/google/cloud/pubsub_v1/open_telemetry/subscribe_opentelemetry.py @@ -85,9 +85,10 @@ def start_subscribe_span( getter=OpenTelemetryContextGetter(), ) self._publisher_create_span_context = parent_span_context - assert len(subscription.split("/")) == 4 - subscription_short_name = subscription.split("/")[3] - self._project_id = subscription.split("/")[1] + split_subscription: List[str] = subscription.split("/") + assert len(split_subscription) == 4 + subscription_short_name = split_subscription[3] + self._project_id = split_subscription[1] self._subscription_id = subscription_short_name with tracer.start_as_current_span( name=f"{subscription_short_name} subscribe", diff --git a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py index cc398cf5c..4c9e1c20e 100644 --- a/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py +++ b/google/cloud/pubsub_v1/subscriber/_protocol/streaming_pull_manager.py @@ -1033,9 +1033,10 @@ def _send_lease_modacks( if self._client.open_telemetry_enabled: subscribe_span_links: List[trace.Link] = [] subscribe_spans: List[trace.Span] = [] - assert len(self._subscription.split("/")) == 4 - subscription_id: str = self._subscription.split("/")[3] - project_id: str = self._subscription.split("/")[1] + subscription_split: List[str] = self._subscription.split("/") + assert len(subscription_split) == 4 + subscription_id: str = subscription_split[3] + project_id: str = subscription_split[1] for data in opentelemetry_data: subscribe_span: Optional[trace.Span] = data.subscribe_span if (