diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f42337d5c5b..61e98378017 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -11,10 +11,6 @@ repos: - id: trailing-whitespace - id: end-of-file-fixer - id: check-toml - - repo: https://github.com/pre-commit/pygrep-hooks - rev: v1.5.1 - hooks: - - id: python-use-type-annotations - repo: local hooks: - id: black diff --git a/CHANGELOG.md b/CHANGELOG.md index adc3a14aad1..c8808f15cc6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,56 @@ This project follows [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) fo ## [Unreleased] + +## 1.23.0 - 2021-12-20 + +### Bug Fixes + +* **apigateway:** allow list of HTTP methods in route method ([#838](https://github.com/awslabs/aws-lambda-powertools-python/issues/838)) +* **event-sources:** pass authorizer data to APIGatewayEventAuthorizer ([#897](https://github.com/awslabs/aws-lambda-powertools-python/issues/897)) +* **event-sources:** handle claimsOverrideDetails set to null ([#878](https://github.com/awslabs/aws-lambda-powertools-python/issues/878)) +* **idempotency:** include decorated fn name in hash ([#869](https://github.com/awslabs/aws-lambda-powertools-python/issues/869)) +* **metrics:** explicit type to single_metric ctx manager ([#865](https://github.com/awslabs/aws-lambda-powertools-python/issues/865)) +* **parameters:** mypy appconfig transform and return types ([#877](https://github.com/awslabs/aws-lambda-powertools-python/issues/877)) +* **parser:** mypy overload parse when using envelope ([#885](https://github.com/awslabs/aws-lambda-powertools-python/issues/885)) +* **parser:** kinesis sequence number is str, not int ([#907](https://github.com/awslabs/aws-lambda-powertools-python/issues/907)) +* **parser:** mypy support for payload type override as models ([#883](https://github.com/awslabs/aws-lambda-powertools-python/issues/883)) +* **tracer:** add warm start annotation (ColdStart=False) ([#851](https://github.com/awslabs/aws-lambda-powertools-python/issues/851)) + +### Documentation + +* **nav**: reference cloudformation custom resource helper (CRD) ([#914](https://github.com/awslabs/aws-lambda-powertools-python/issues/914)) +* add new public Slack invite +* disable search blur in non-prod env +* update Lambda Layers version +* **apigateway:** add new not_found feature ([#915](https://github.com/awslabs/aws-lambda-powertools-python/issues/915)) +* **apigateway:** fix sample layout provided ([#864](https://github.com/awslabs/aws-lambda-powertools-python/issues/864)) +* **appsync:** fix users.py typo to locations [#830](https://github.com/awslabs/aws-lambda-powertools-python/issues/830) +* **lambda_layer:** fix CDK layer syntax + +### Features + +* **apigateway:** add exception_handler support ([#898](https://github.com/awslabs/aws-lambda-powertools-python/issues/898)) +* **apigateway:** access parent api resolver from router ([#842](https://github.com/awslabs/aws-lambda-powertools-python/issues/842)) +* **batch:** new BatchProcessor for SQS, DynamoDB, Kinesis ([#886](https://github.com/awslabs/aws-lambda-powertools-python/issues/886)) +* **logger:** allow handler with custom kwargs signature ([#913](https://github.com/awslabs/aws-lambda-powertools-python/issues/913)) +* **tracer:** add service annotation when service is set ([#861](https://github.com/awslabs/aws-lambda-powertools-python/issues/861)) + +### Maintenance + +* minor housekeeping before release ([#912](https://github.com/awslabs/aws-lambda-powertools-python/issues/912)) +* correct pr label order +* **ci:** split latest docs workflow +* **deps:** bump fastjsonschema from 2.15.1 to 2.15.2 ([#891](https://github.com/awslabs/aws-lambda-powertools-python/issues/891)) +* **deps:** bump actions/setup-python from 2.2.2 to 2.3.0 ([#831](https://github.com/awslabs/aws-lambda-powertools-python/issues/831)) +* **deps:** support arm64 when developing locally ([#862](https://github.com/awslabs/aws-lambda-powertools-python/issues/862)) +* **deps:** bump actions/setup-python from 2.3.0 to 2.3.1 ([#852](https://github.com/awslabs/aws-lambda-powertools-python/issues/852)) +* **deps:** bump aws-xray-sdk from 2.8.0 to 2.9.0 ([#876](https://github.com/awslabs/aws-lambda-powertools-python/issues/876)) +* **deps-dev:** bump mypy from 0.910 to 0.920 ([#903](https://github.com/awslabs/aws-lambda-powertools-python/issues/903)) +* **deps-dev:** bump flake8 from 3.9.2 to 4.0.1 ([#789](https://github.com/awslabs/aws-lambda-powertools-python/issues/789)) +* **deps-dev:** bump black from 21.10b0 to 21.11b1 ([#839](https://github.com/awslabs/aws-lambda-powertools-python/issues/839)) +* **deps-dev:** bump black from 21.11b1 to 21.12b0 ([#872](https://github.com/awslabs/aws-lambda-powertools-python/issues/872)) + ## 1.22.0 - 2021-11-17 Tenet update! We've updated **Idiomatic** tenet to **Progressive** to reflect the new Router feature in Event Handler, and more importantly the new wave of customers coming from SRE, Data Analysis, and Data Science background. diff --git a/README.md b/README.md index d7796e27af5..46c5db5ad6c 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ With [pip](https://pip.pypa.io/en/latest/index.html) installed, run: ``pip insta * [Serverless Shopping cart](https://github.com/aws-samples/aws-serverless-shopping-cart) * [Serverless Airline](https://github.com/aws-samples/aws-serverless-airline-booking) * [Serverless E-commerce platform](https://github.com/aws-samples/aws-serverless-ecommerce-platform) +* [Serverless GraphQL Nanny Booking Api](https://github.com/trey-rosius/babysitter_api) ## Credits diff --git a/aws_lambda_powertools/event_handler/api_gateway.py b/aws_lambda_powertools/event_handler/api_gateway.py index b3d77df24b4..30c13ada6b5 100644 --- a/aws_lambda_powertools/event_handler/api_gateway.py +++ b/aws_lambda_powertools/event_handler/api_gateway.py @@ -10,10 +10,10 @@ from enum import Enum from functools import partial from http import HTTPStatus -from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Union +from typing import Any, Callable, Dict, List, Optional, Set, Tuple, Type, Union from aws_lambda_powertools.event_handler import content_types -from aws_lambda_powertools.event_handler.exceptions import ServiceError +from aws_lambda_powertools.event_handler.exceptions import NotFoundError, ServiceError from aws_lambda_powertools.shared import constants from aws_lambda_powertools.shared.functions import resolve_truthy_env_var_choice from aws_lambda_powertools.shared.json_encoder import Encoder @@ -27,7 +27,6 @@ _SAFE_URI = "-._~()'!*:@,;" # https://www.ietf.org/rfc/rfc3986.txt # API GW/ALB decode non-safe URI chars; we must support them too _UNSAFE_URI = "%<>\[\]{}|^" # noqa: W605 - _NAMED_GROUP_BOUNDARY_PATTERN = fr"(?P\1[{_SAFE_URI}{_UNSAFE_URI}\\w]+)" @@ -435,6 +434,7 @@ def __init__( self._proxy_type = proxy_type self._routes: List[Route] = [] self._route_keys: List[str] = [] + self._exception_handlers: Dict[Type, Callable] = {} self._cors = cors self._cors_enabled: bool = cors is not None self._cors_methods: Set[str] = {"OPTIONS"} @@ -579,7 +579,7 @@ def _remove_prefix(self, path: str) -> str: @staticmethod def _path_starts_with(path: str, prefix: str): """Returns true if the `path` starts with a prefix plus a `/`""" - if not isinstance(prefix, str) or len(prefix) == 0: + if not isinstance(prefix, str) or prefix == "": return False return path.startswith(prefix + "/") @@ -596,6 +596,10 @@ def _not_found(self, method: str) -> ResponseBuilder: headers["Access-Control-Allow-Methods"] = ",".join(sorted(self._cors_methods)) return ResponseBuilder(Response(status_code=204, content_type=None, headers=headers, body=None)) + handler = self._lookup_exception_handler(NotFoundError) + if handler: + return ResponseBuilder(handler(NotFoundError())) + return ResponseBuilder( Response( status_code=HTTPStatus.NOT_FOUND.value, @@ -609,16 +613,11 @@ def _call_route(self, route: Route, args: Dict[str, str]) -> ResponseBuilder: """Actually call the matching route with any provided keyword arguments.""" try: return ResponseBuilder(self._to_response(route.func(**args)), route) - except ServiceError as e: - return ResponseBuilder( - Response( - status_code=e.status_code, - content_type=content_types.APPLICATION_JSON, - body=self._json_dump({"statusCode": e.status_code, "message": e.msg}), - ), - route, - ) - except Exception: + except Exception as exc: + response_builder = self._call_exception_handler(exc, route) + if response_builder: + return response_builder + if self._debug: # If the user has turned on debug mode, # we'll let the original exception propagate so @@ -628,10 +627,48 @@ def _call_route(self, route: Route, args: Dict[str, str]) -> ResponseBuilder: status_code=500, content_type=content_types.TEXT_PLAIN, body="".join(traceback.format_exc()), - ) + ), + route, ) + raise + def not_found(self, func: Optional[Callable] = None): + if func is None: + return self.exception_handler(NotFoundError) + return self.exception_handler(NotFoundError)(func) + + def exception_handler(self, exc_class: Type[Exception]): + def register_exception_handler(func: Callable): + self._exception_handlers[exc_class] = func + + return register_exception_handler + + def _lookup_exception_handler(self, exp_type: Type) -> Optional[Callable]: + # Use "Method Resolution Order" to allow for matching against a base class + # of an exception + for cls in exp_type.__mro__: + if cls in self._exception_handlers: + return self._exception_handlers[cls] + return None + + def _call_exception_handler(self, exp: Exception, route: Route) -> Optional[ResponseBuilder]: + handler = self._lookup_exception_handler(type(exp)) + if handler: + return ResponseBuilder(handler(exp), route) + + if isinstance(exp, ServiceError): + return ResponseBuilder( + Response( + status_code=exp.status_code, + content_type=content_types.APPLICATION_JSON, + body=self._json_dump({"statusCode": exp.status_code, "message": exp.msg}), + ), + route, + ) + + return None + def _to_response(self, result: Union[Dict, Response]) -> Response: """Convert the route's result to a Response diff --git a/aws_lambda_powertools/logging/logger.py b/aws_lambda_powertools/logging/logger.py index 0b9b52f8824..7ca5a18e6f4 100644 --- a/aws_lambda_powertools/logging/logger.py +++ b/aws_lambda_powertools/logging/logger.py @@ -328,7 +328,7 @@ def handler(event, context): ) @functools.wraps(lambda_handler) - def decorate(event, context): + def decorate(event, context, **kwargs): lambda_context = build_lambda_context_model(context) cold_start = _is_cold_start() diff --git a/aws_lambda_powertools/shared/functions.py b/aws_lambda_powertools/shared/functions.py index 0b117cc32bb..11c4e4ce77c 100644 --- a/aws_lambda_powertools/shared/functions.py +++ b/aws_lambda_powertools/shared/functions.py @@ -1,7 +1,23 @@ -from distutils.util import strtobool from typing import Any, Optional, Union +def strtobool(value: str) -> bool: + """Convert a string representation of truth to True or False. + + True values are 'y', 'yes', 't', 'true', 'on', and '1'; false values + are 'n', 'no', 'f', 'false', 'off', and '0'. Raises ValueError if + 'value' is anything else. + + > note:: Copied from distutils.util. + """ + value = value.lower() + if value in ("y", "yes", "t", "true", "on", "1"): + return True + if value in ("n", "no", "f", "false", "off", "0"): + return False + raise ValueError(f"invalid truth value {value!r}") + + def resolve_truthy_env_var_choice(env: str, choice: Optional[bool] = None) -> bool: """Pick explicit choice over truthy env value, if available, otherwise return truthy env value diff --git a/aws_lambda_powertools/tracing/tracer.py b/aws_lambda_powertools/tracing/tracer.py index 70580663e7b..200325cfe9a 100644 --- a/aws_lambda_powertools/tracing/tracer.py +++ b/aws_lambda_powertools/tracing/tracer.py @@ -5,7 +5,7 @@ import logging import numbers import os -from typing import Any, Callable, Dict, Optional, Sequence, Union, cast, overload +from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast, overload from ..shared import constants from ..shared.functions import resolve_env_var_choice, resolve_truthy_env_var_choice @@ -758,7 +758,7 @@ def _patch_xray_provider(self): # Due to Lazy Import, we need to activate `core` attrib via import # we also need to include `patch`, `patch_all` methods # to ensure patch calls are done via the provider - from aws_xray_sdk.core import xray_recorder + from aws_xray_sdk.core import xray_recorder # type: ignore provider = xray_recorder provider.patch = aws_xray_sdk.core.patch @@ -778,3 +778,27 @@ def _disable_xray_trace_batching(self): def _is_xray_provider(self): return "aws_xray_sdk" in self.provider.__module__ + + def ignore_endpoint(self, hostname: Optional[str] = None, urls: Optional[List[str]] = None): + """If you want to ignore certain httplib requests you can do so based on the hostname or URL that is being + requested. + + > NOTE: If the provider is not xray, nothing will be added to ignore list + + Documentation + -------------- + - https://github.com/aws/aws-xray-sdk-python#ignoring-httplib-requests + + Parameters + ---------- + hostname : Optional, str + The hostname is matched using the Python fnmatch library which does Unix glob style matching. + urls: Optional, List[str] + List of urls to ignore. Example `tracer.ignore_endpoint(urls=["/ignored-url"])` + """ + if not self._is_xray_provider(): + return + + from aws_xray_sdk.ext.httplib import add_ignored # type: ignore + + add_ignored(hostname=hostname, urls=urls) diff --git a/aws_lambda_powertools/utilities/batch/__init__.py b/aws_lambda_powertools/utilities/batch/__init__.py index d308a56abda..463f6f7fbff 100644 --- a/aws_lambda_powertools/utilities/batch/__init__.py +++ b/aws_lambda_powertools/utilities/batch/__init__.py @@ -4,7 +4,25 @@ Batch processing utility """ -from .base import BasePartialProcessor, batch_processor -from .sqs import PartialSQSProcessor, sqs_batch_processor +from aws_lambda_powertools.utilities.batch.base import ( + BasePartialProcessor, + BatchProcessor, + EventType, + FailureResponse, + SuccessResponse, + batch_processor, +) +from aws_lambda_powertools.utilities.batch.exceptions import ExceptionInfo +from aws_lambda_powertools.utilities.batch.sqs import PartialSQSProcessor, sqs_batch_processor -__all__ = ("BasePartialProcessor", "PartialSQSProcessor", "batch_processor", "sqs_batch_processor") +__all__ = ( + "BatchProcessor", + "BasePartialProcessor", + "ExceptionInfo", + "EventType", + "FailureResponse", + "PartialSQSProcessor", + "SuccessResponse", + "batch_processor", + "sqs_batch_processor", +) diff --git a/aws_lambda_powertools/utilities/batch/base.py b/aws_lambda_powertools/utilities/batch/base.py index a0ad18a9ec1..d8fdc2d85f2 100644 --- a/aws_lambda_powertools/utilities/batch/base.py +++ b/aws_lambda_powertools/utilities/batch/base.py @@ -3,25 +3,62 @@ """ Batch processing utilities """ - +import copy import logging +import sys from abc import ABC, abstractmethod -from typing import Any, Callable, Dict, List, Tuple +from enum import Enum +from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union, overload from aws_lambda_powertools.middleware_factory import lambda_handler_decorator +from aws_lambda_powertools.utilities.batch.exceptions import BatchProcessingError, ExceptionInfo +from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord +from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord +from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord logger = logging.getLogger(__name__) +class EventType(Enum): + SQS = "SQS" + KinesisDataStreams = "KinesisDataStreams" + DynamoDBStreams = "DynamoDBStreams" + + +# +# type specifics +# +has_pydantic = "pydantic" in sys.modules + +# For IntelliSense and Mypy to work, we need to account for possible SQS, Kinesis and DynamoDB subclasses +# We need them as subclasses as we must access their message ID or sequence number metadata via dot notation +if has_pydantic: + from aws_lambda_powertools.utilities.parser.models import DynamoDBStreamRecordModel + from aws_lambda_powertools.utilities.parser.models import KinesisDataStreamRecord as KinesisDataStreamRecordModel + from aws_lambda_powertools.utilities.parser.models import SqsRecordModel + + BatchTypeModels = Optional[ + Union[Type[SqsRecordModel], Type[DynamoDBStreamRecordModel], Type[KinesisDataStreamRecordModel]] + ] + +# When using processor with default arguments, records will carry EventSourceDataClassTypes +# and depending on what EventType it's passed it'll correctly map to the right record +# When using Pydantic Models, it'll accept any subclass from SQS, DynamoDB and Kinesis +EventSourceDataClassTypes = Union[SQSRecord, KinesisStreamRecord, DynamoDBRecord] +BatchEventTypes = Union[EventSourceDataClassTypes, "BatchTypeModels"] +SuccessResponse = Tuple[str, Any, BatchEventTypes] +FailureResponse = Tuple[str, str, BatchEventTypes] + + class BasePartialProcessor(ABC): """ Abstract class for batch processors. """ def __init__(self): - self.success_messages: List = [] - self.fail_messages: List = [] - self.exceptions: List = [] + self.success_messages: List[BatchEventTypes] = [] + self.fail_messages: List[BatchEventTypes] = [] + self.exceptions: List[ExceptionInfo] = [] @abstractmethod def _prepare(self): @@ -38,7 +75,7 @@ def _clean(self): raise NotImplementedError() @abstractmethod - def _process_record(self, record: Any): + def _process_record(self, record: dict): """ Process record with handler. """ @@ -57,13 +94,13 @@ def __enter__(self): def __exit__(self, exception_type, exception_value, traceback): self._clean() - def __call__(self, records: List[Any], handler: Callable): + def __call__(self, records: List[dict], handler: Callable): """ Set instance attributes before execution Parameters ---------- - records: List[Any] + records: List[dict] List with objects to be processed. handler: Callable Callable to process "records" entries. @@ -72,26 +109,40 @@ def __call__(self, records: List[Any], handler: Callable): self.handler = handler return self - def success_handler(self, record: Any, result: Any): + def success_handler(self, record, result: Any) -> SuccessResponse: """ - Success callback + Keeps track of batch records that were processed successfully + + Parameters + ---------- + record: Any + record that failed processing + result: Any + result from record handler Returns ------- - tuple + SuccessResponse "success", result, original record """ entry = ("success", result, record) self.success_messages.append(record) return entry - def failure_handler(self, record: Any, exception: Tuple): + def failure_handler(self, record, exception: ExceptionInfo) -> FailureResponse: """ - Failure callback + Keeps track of batch records that failed processing + + Parameters + ---------- + record: Any + record that failed processing + exception: ExceptionInfo + Exception information containing type, value, and traceback (sys.exc_info()) Returns ------- - tuple + FailureResponse "fail", exceptions args, original record """ exception_string = f"{exception[0]}:{exception[1]}" @@ -146,3 +197,239 @@ def batch_processor( processor.process() return handler(event, context) + + +class BatchProcessor(BasePartialProcessor): + """Process native partial responses from SQS, Kinesis Data Streams, and DynamoDB. + + + Example + ------- + + ## Process batch triggered by SQS + + ```python + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.SQS) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + + ## Process batch triggered by Kinesis Data Streams + + ```python + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: KinesisStreamRecord): + logger.info(record.kinesis.data_as_text) + payload: dict = record.kinesis.data_as_json() + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + + + ## Process batch triggered by DynamoDB Data Streams + + ```python + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: DynamoDBRecord): + logger.info(record.dynamodb.new_image) + payload: dict = json.loads(record.dynamodb.new_image.get("item").s_value) + # alternatively: + # changes: Dict[str, dynamo_db_stream_event.AttributeValue] = record.dynamodb.new_image # noqa: E800 + # payload = change.get("Message").raw_event -> {"S": ""} + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + def lambda_handler(event, context: LambdaContext): + batch = event["Records"] + with processor(records=batch, processor=processor): + processed_messages = processor.process() # kick off processing, return list[tuple] + + return processor.response() + ``` + + + Raises + ------ + BatchProcessingError + When all batch records fail processing + """ + + DEFAULT_RESPONSE: Dict[str, List[Optional[dict]]] = {"batchItemFailures": []} + + def __init__(self, event_type: EventType, model: Optional["BatchTypeModels"] = None): + """Process batch and partially report failed items + + Parameters + ---------- + event_type: EventType + Whether this is a SQS, DynamoDB Streams, or Kinesis Data Stream event + model: Optional["BatchTypeModels"] + Parser's data model using either SqsRecordModel, DynamoDBStreamRecordModel, KinesisDataStreamRecord + + Exceptions + ---------- + BatchProcessingError + Raised when the entire batch has failed processing + """ + self.event_type = event_type + self.model = model + self.batch_response = copy.deepcopy(self.DEFAULT_RESPONSE) + self._COLLECTOR_MAPPING = { + EventType.SQS: self._collect_sqs_failures, + EventType.KinesisDataStreams: self._collect_kinesis_failures, + EventType.DynamoDBStreams: self._collect_dynamodb_failures, + } + self._DATA_CLASS_MAPPING = { + EventType.SQS: SQSRecord, + EventType.KinesisDataStreams: KinesisStreamRecord, + EventType.DynamoDBStreams: DynamoDBRecord, + } + + super().__init__() + + def response(self): + """Batch items that failed processing, if any""" + return self.batch_response + + def _prepare(self): + """ + Remove results from previous execution. + """ + self.success_messages.clear() + self.fail_messages.clear() + self.batch_response = copy.deepcopy(self.DEFAULT_RESPONSE) + + def _process_record(self, record: dict) -> Union[SuccessResponse, FailureResponse]: + """ + Process a record with instance's handler + + Parameters + ---------- + record: dict + A batch record to be processed. + """ + data = self._to_batch_type(record=record, event_type=self.event_type, model=self.model) + try: + result = self.handler(record=data) + return self.success_handler(record=record, result=result) + except Exception: + return self.failure_handler(record=data, exception=sys.exc_info()) + + def _clean(self): + """ + Report messages to be deleted in case of partial failure. + """ + + if not self._has_messages_to_report(): + return + + if self._entire_batch_failed(): + raise BatchProcessingError( + msg=f"All records failed processing. {len(self.exceptions)} individual errors logged" + f"separately below.", + child_exceptions=self.exceptions, + ) + + messages = self._get_messages_to_report() + self.batch_response = {"batchItemFailures": [messages]} + + def _has_messages_to_report(self) -> bool: + if self.fail_messages: + return True + + logger.debug(f"All {len(self.success_messages)} records successfully processed") + return False + + def _entire_batch_failed(self) -> bool: + return len(self.exceptions) == len(self.records) + + def _get_messages_to_report(self) -> Dict[str, str]: + """ + Format messages to use in batch deletion + """ + return self._COLLECTOR_MAPPING[self.event_type]() + + # Event Source Data Classes follow python idioms for fields + # while Parser/Pydantic follows the event field names to the latter + def _collect_sqs_failures(self): + if self.model: + return {"itemIdentifier": msg.messageId for msg in self.fail_messages} + return {"itemIdentifier": msg.message_id for msg in self.fail_messages} + + def _collect_kinesis_failures(self): + if self.model: + # Pydantic model uses int but Lambda poller expects str + return {"itemIdentifier": msg.kinesis.sequenceNumber for msg in self.fail_messages} + return {"itemIdentifier": msg.kinesis.sequence_number for msg in self.fail_messages} + + def _collect_dynamodb_failures(self): + if self.model: + return {"itemIdentifier": msg.dynamodb.SequenceNumber for msg in self.fail_messages} + return {"itemIdentifier": msg.dynamodb.sequence_number for msg in self.fail_messages} + + @overload + def _to_batch_type(self, record: dict, event_type: EventType, model: "BatchTypeModels") -> "BatchTypeModels": + ... # pragma: no cover + + @overload + def _to_batch_type(self, record: dict, event_type: EventType) -> EventSourceDataClassTypes: + ... # pragma: no cover + + def _to_batch_type(self, record: dict, event_type: EventType, model: Optional["BatchTypeModels"] = None): + if model is not None: + return model.parse_obj(record) + return self._DATA_CLASS_MAPPING[event_type](record) diff --git a/aws_lambda_powertools/utilities/batch/exceptions.py b/aws_lambda_powertools/utilities/batch/exceptions.py index c2ead04a7b1..dc4ca300c7c 100644 --- a/aws_lambda_powertools/utilities/batch/exceptions.py +++ b/aws_lambda_powertools/utilities/batch/exceptions.py @@ -2,20 +2,19 @@ Batch processing exceptions """ import traceback +from types import TracebackType +from typing import List, Optional, Tuple, Type +ExceptionInfo = Tuple[Type[BaseException], BaseException, TracebackType] -class SQSBatchProcessingError(Exception): - """When at least one message within a batch could not be processed""" - def __init__(self, msg="", child_exceptions=()): +class BaseBatchProcessingError(Exception): + def __init__(self, msg="", child_exceptions: Optional[List[ExceptionInfo]] = None): super().__init__(msg) self.msg = msg self.child_exceptions = child_exceptions - # Overriding this method so we can output all child exception tracebacks when we raise this exception to prevent - # errors being lost. See https://github.com/awslabs/aws-lambda-powertools-python/issues/275 - def __str__(self): - parent_exception_str = super(SQSBatchProcessingError, self).__str__() + def format_exceptions(self, parent_exception_str): exception_list = [f"{parent_exception_str}\n"] for exception in self.child_exceptions: extype, ex, tb = exception @@ -23,3 +22,27 @@ def __str__(self): exception_list.append(formatted) return "\n".join(exception_list) + + +class SQSBatchProcessingError(BaseBatchProcessingError): + """When at least one message within a batch could not be processed""" + + def __init__(self, msg="", child_exceptions: Optional[List[ExceptionInfo]] = None): + super().__init__(msg, child_exceptions) + + # Overriding this method so we can output all child exception tracebacks when we raise this exception to prevent + # errors being lost. See https://github.com/awslabs/aws-lambda-powertools-python/issues/275 + def __str__(self): + parent_exception_str = super(SQSBatchProcessingError, self).__str__() + return self.format_exceptions(parent_exception_str) + + +class BatchProcessingError(BaseBatchProcessingError): + """When all batch records failed to be processed""" + + def __init__(self, msg="", child_exceptions: Optional[List[ExceptionInfo]] = None): + super().__init__(msg, child_exceptions) + + def __str__(self): + parent_exception_str = super(BatchProcessingError, self).__str__() + return self.format_exceptions(parent_exception_str) diff --git a/aws_lambda_powertools/utilities/data_classes/active_mq_event.py b/aws_lambda_powertools/utilities/data_classes/active_mq_event.py index 058a6a6ecf4..09981bdcdd2 100644 --- a/aws_lambda_powertools/utilities/data_classes/active_mq_event.py +++ b/aws_lambda_powertools/utilities/data_classes/active_mq_event.py @@ -27,7 +27,9 @@ def decoded_data(self) -> str: @property def json_data(self) -> Any: """Parses the data as json""" - return json.loads(self.decoded_data) + if self._json_data is None: + self._json_data = json.loads(self.decoded_data) + return self._json_data @property def connection_id(self) -> str: diff --git a/aws_lambda_powertools/utilities/data_classes/api_gateway_proxy_event.py b/aws_lambda_powertools/utilities/data_classes/api_gateway_proxy_event.py index 34ac8d83993..adce2d4b11b 100644 --- a/aws_lambda_powertools/utilities/data_classes/api_gateway_proxy_event.py +++ b/aws_lambda_powertools/utilities/data_classes/api_gateway_proxy_event.py @@ -11,11 +11,22 @@ class APIGatewayEventAuthorizer(DictWrapper): @property def claims(self) -> Optional[Dict[str, Any]]: - return self["requestContext"]["authorizer"].get("claims") + return self.get("claims") @property def scopes(self) -> Optional[List[str]]: - return self["requestContext"]["authorizer"].get("scopes") + return self.get("scopes") + + @property + def principal_id(self) -> Optional[str]: + """The principal user identification associated with the token sent by the client and returned from an + API Gateway Lambda authorizer (formerly known as a custom authorizer)""" + return self.get("principalId") + + @property + def integration_latency(self) -> Optional[int]: + """The authorizer latency in ms.""" + return self.get("integrationLatency") class APIGatewayEventRequestContext(BaseRequestContext): @@ -56,7 +67,7 @@ def route_key(self) -> Optional[str]: @property def authorizer(self) -> APIGatewayEventAuthorizer: - return APIGatewayEventAuthorizer(self._data) + return APIGatewayEventAuthorizer(self._data["requestContext"]["authorizer"]) class APIGatewayProxyEvent(BaseProxyEvent): diff --git a/aws_lambda_powertools/utilities/data_classes/code_pipeline_job_event.py b/aws_lambda_powertools/utilities/data_classes/code_pipeline_job_event.py index e13d32fb169..e17bd13807c 100644 --- a/aws_lambda_powertools/utilities/data_classes/code_pipeline_job_event.py +++ b/aws_lambda_powertools/utilities/data_classes/code_pipeline_job_event.py @@ -23,7 +23,9 @@ def user_parameters(self) -> str: @property def decoded_user_parameters(self) -> Dict[str, Any]: """Json Decoded user parameters""" - return json.loads(self.user_parameters) + if self._json_data is None: + self._json_data = json.loads(self.user_parameters) + return self._json_data class CodePipelineActionConfiguration(DictWrapper): diff --git a/aws_lambda_powertools/utilities/data_classes/cognito_user_pool_event.py b/aws_lambda_powertools/utilities/data_classes/cognito_user_pool_event.py index 954d3d15b5f..df2726ee722 100644 --- a/aws_lambda_powertools/utilities/data_classes/cognito_user_pool_event.py +++ b/aws_lambda_powertools/utilities/data_classes/cognito_user_pool_event.py @@ -687,7 +687,7 @@ def session(self) -> List[ChallengeResult]: @property def client_metadata(self) -> Optional[Dict[str, str]]: """One or more key-value pairs that you can provide as custom input to the Lambda function that you - specify for the create auth challenge trigger..""" + specify for the create auth challenge trigger.""" return self["request"].get("clientMetadata") diff --git a/aws_lambda_powertools/utilities/data_classes/common.py b/aws_lambda_powertools/utilities/data_classes/common.py index 566e1c56259..45f6bafc957 100644 --- a/aws_lambda_powertools/utilities/data_classes/common.py +++ b/aws_lambda_powertools/utilities/data_classes/common.py @@ -8,6 +8,7 @@ class DictWrapper: def __init__(self, data: Dict[str, Any]): self._data = data + self._json_data: Optional[Any] = None def __getitem__(self, key: str) -> Any: return self._data[key] @@ -18,8 +19,8 @@ def __eq__(self, other: Any) -> bool: return self._data == other._data - def get(self, key: str) -> Optional[Any]: - return self._data.get(key) + def get(self, key: str, default: Optional[Any] = None) -> Optional[Any]: + return self._data.get(key, default) @property def raw_event(self) -> Dict[str, Any]: @@ -37,7 +38,7 @@ def get_header_value( name_lower = name.lower() return next( - # Iterate over the dict and do a case insensitive key comparison + # Iterate over the dict and do a case-insensitive key comparison (value for key, value in headers.items() if key.lower() == name_lower), # Default value is returned if no matches was found default_value, @@ -65,7 +66,9 @@ def body(self) -> Optional[str]: @property def json_body(self) -> Any: """Parses the submitted body as json""" - return json.loads(self.decoded_body) + if self._json_data is None: + self._json_data = json.loads(self.decoded_body) + return self._json_data @property def decoded_body(self) -> str: @@ -113,7 +116,7 @@ def get_header_value( default_value: str, optional Default value if no value was found by name case_sensitive: bool - Whether to use a case sensitive look up + Whether to use a case-sensitive look up Returns ------- str, optional diff --git a/aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py b/aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py index 1ec3d6157bf..01d892f9edc 100644 --- a/aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py +++ b/aws_lambda_powertools/utilities/data_classes/dynamo_db_stream_event.py @@ -106,14 +106,13 @@ def ns_value(self) -> Optional[List[str]]: return self.get("NS") @property - def null_value(self) -> Optional[bool]: + def null_value(self) -> None: """An attribute of type Null. Example: >>> {"NULL": True} """ - item = self.get("NULL") - return None if item is None else bool(item) + return None @property def s_value(self) -> Optional[str]: diff --git a/aws_lambda_powertools/utilities/data_classes/rabbit_mq_event.py b/aws_lambda_powertools/utilities/data_classes/rabbit_mq_event.py index 7676e6ff9b5..0822a58da18 100644 --- a/aws_lambda_powertools/utilities/data_classes/rabbit_mq_event.py +++ b/aws_lambda_powertools/utilities/data_classes/rabbit_mq_event.py @@ -88,7 +88,9 @@ def decoded_data(self) -> str: @property def json_data(self) -> Any: """Parses the data as json""" - return json.loads(self.decoded_data) + if self._json_data is None: + self._json_data = json.loads(self.decoded_data) + return self._json_data class RabbitMQEvent(DictWrapper): diff --git a/aws_lambda_powertools/utilities/idempotency/base.py b/aws_lambda_powertools/utilities/idempotency/base.py index 7dee94fc356..dddc36b426d 100644 --- a/aws_lambda_powertools/utilities/idempotency/base.py +++ b/aws_lambda_powertools/utilities/idempotency/base.py @@ -21,6 +21,23 @@ logger = logging.getLogger(__name__) +def _prepare_data(data: Any) -> Any: + """Prepare data for json serialization. + + We will convert Python dataclasses, pydantic models or event source data classes to a dict, + otherwise return data as-is. + """ + if hasattr(data, "__dataclass_fields__"): + import dataclasses + + return dataclasses.asdict(data) + + if callable(getattr(data, "dict", None)): + return data.dict() + + return getattr(data, "raw_event", data) + + class IdempotencyHandler: """ Base class to orchestrate calls to persistence layer. @@ -52,7 +69,7 @@ def __init__( Function keyword arguments """ self.function = function - self.data = function_payload + self.data = _prepare_data(function_payload) self.fn_args = function_args self.fn_kwargs = function_kwargs diff --git a/aws_lambda_powertools/utilities/idempotency/exceptions.py b/aws_lambda_powertools/utilities/idempotency/exceptions.py index 6c7318ebca0..e114ab57e8d 100644 --- a/aws_lambda_powertools/utilities/idempotency/exceptions.py +++ b/aws_lambda_powertools/utilities/idempotency/exceptions.py @@ -47,5 +47,5 @@ class IdempotencyPersistenceLayerError(Exception): class IdempotencyKeyError(Exception): """ - Payload does not contain a idempotent key + Payload does not contain an idempotent key """ diff --git a/aws_lambda_powertools/utilities/idempotency/idempotency.py b/aws_lambda_powertools/utilities/idempotency/idempotency.py index 6984cfbbd8e..42b8052fd32 100644 --- a/aws_lambda_powertools/utilities/idempotency/idempotency.py +++ b/aws_lambda_powertools/utilities/idempotency/idempotency.py @@ -7,7 +7,7 @@ from typing import Any, Callable, Dict, Optional, cast from aws_lambda_powertools.middleware_factory import lambda_handler_decorator -from aws_lambda_powertools.shared.constants import IDEMPOTENCY_DISABLED_ENV +from aws_lambda_powertools.shared import constants from aws_lambda_powertools.shared.types import AnyCallableT from aws_lambda_powertools.utilities.idempotency.base import IdempotencyHandler from aws_lambda_powertools.utilities.idempotency.config import IdempotencyConfig @@ -58,7 +58,7 @@ def idempotent( >>> return {"StatusCode": 200} """ - if os.getenv(IDEMPOTENCY_DISABLED_ENV): + if os.getenv(constants.IDEMPOTENCY_DISABLED_ENV): return handler(event, context) config = config or IdempotencyConfig() @@ -127,7 +127,7 @@ def process_order(customer_id: str, order: dict, **kwargs): @functools.wraps(function) def decorate(*args, **kwargs): - if os.getenv(IDEMPOTENCY_DISABLED_ENV): + if os.getenv(constants.IDEMPOTENCY_DISABLED_ENV): return function(*args, **kwargs) payload = kwargs.get(data_keyword_argument) diff --git a/aws_lambda_powertools/utilities/idempotency/persistence/base.py b/aws_lambda_powertools/utilities/idempotency/persistence/base.py index 8f2b30d289a..b07662e6432 100644 --- a/aws_lambda_powertools/utilities/idempotency/persistence/base.py +++ b/aws_lambda_powertools/utilities/idempotency/persistence/base.py @@ -1,7 +1,6 @@ """ Persistence layers supporting idempotency """ - import datetime import hashlib import json @@ -226,7 +225,6 @@ def _generate_hash(self, data: Any) -> str: Hashed representation of the provided data """ - data = getattr(data, "raw_event", data) # could be a data class depending on decorator order hashed_data = self.hash_function(json.dumps(data, cls=Encoder, sort_keys=True).encode()) return hashed_data.hexdigest() diff --git a/aws_lambda_powertools/utilities/parser/envelopes/base.py b/aws_lambda_powertools/utilities/parser/envelopes/base.py index 06e78160d87..85486fdd876 100644 --- a/aws_lambda_powertools/utilities/parser/envelopes/base.py +++ b/aws_lambda_powertools/utilities/parser/envelopes/base.py @@ -2,7 +2,7 @@ from abc import ABC, abstractmethod from typing import Any, Dict, Optional, Type, TypeVar, Union -from ..types import Model +from aws_lambda_powertools.utilities.parser.types import Model logger = logging.getLogger(__name__) diff --git a/aws_lambda_powertools/utilities/parser/models/alb.py b/aws_lambda_powertools/utilities/parser/models/alb.py index d4ea5fde2a1..1112d0c04e4 100644 --- a/aws_lambda_powertools/utilities/parser/models/alb.py +++ b/aws_lambda_powertools/utilities/parser/models/alb.py @@ -1,7 +1,9 @@ -from typing import Dict +from typing import Dict, Union from pydantic import BaseModel +from aws_lambda_powertools.utilities.parser.types import Model + class AlbRequestContextData(BaseModel): targetGroupArn: str @@ -14,7 +16,7 @@ class AlbRequestContext(BaseModel): class AlbModel(BaseModel): httpMethod: str path: str - body: str + body: Union[str, Model] isBase64Encoded: bool headers: Dict[str, str] queryStringParameters: Dict[str, str] diff --git a/aws_lambda_powertools/utilities/parser/models/apigw.py b/aws_lambda_powertools/utilities/parser/models/apigw.py index 283a73da9c3..ce519b8e0e3 100644 --- a/aws_lambda_powertools/utilities/parser/models/apigw.py +++ b/aws_lambda_powertools/utilities/parser/models/apigw.py @@ -1,10 +1,10 @@ from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, root_validator from pydantic.networks import IPvAnyNetwork -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model class ApiGatewayUserCertValidity(BaseModel): @@ -89,4 +89,4 @@ class APIGatewayProxyEventModel(BaseModel): pathParameters: Optional[Dict[str, str]] stageVariables: Optional[Dict[str, str]] isBase64Encoded: bool - body: Optional[str] + body: Optional[Union[str, Model]] diff --git a/aws_lambda_powertools/utilities/parser/models/apigwv2.py b/aws_lambda_powertools/utilities/parser/models/apigwv2.py index 36dd85b907e..ddaf2d7ef82 100644 --- a/aws_lambda_powertools/utilities/parser/models/apigwv2.py +++ b/aws_lambda_powertools/utilities/parser/models/apigwv2.py @@ -1,10 +1,10 @@ from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field from pydantic.networks import IPvAnyNetwork -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model class RequestContextV2AuthorizerIamCognito(BaseModel): @@ -67,5 +67,5 @@ class APIGatewayProxyEventV2Model(BaseModel): pathParameters: Optional[Dict[str, str]] stageVariables: Optional[Dict[str, str]] requestContext: RequestContextV2 - body: Optional[str] + body: Optional[Union[str, Model]] isBase64Encoded: bool diff --git a/aws_lambda_powertools/utilities/parser/models/cloudwatch.py b/aws_lambda_powertools/utilities/parser/models/cloudwatch.py index a0fd3e37239..9b954ec3b13 100644 --- a/aws_lambda_powertools/utilities/parser/models/cloudwatch.py +++ b/aws_lambda_powertools/utilities/parser/models/cloudwatch.py @@ -3,17 +3,19 @@ import logging import zlib from datetime import datetime -from typing import List +from typing import List, Union from pydantic import BaseModel, Field, validator +from aws_lambda_powertools.utilities.parser.types import Model + logger = logging.getLogger(__name__) class CloudWatchLogsLogEvent(BaseModel): id: str # noqa AA03 VNE003 timestamp: datetime - message: str + message: Union[str, Model] class CloudWatchLogsDecode(BaseModel): diff --git a/aws_lambda_powertools/utilities/parser/models/dynamodb.py b/aws_lambda_powertools/utilities/parser/models/dynamodb.py index e7e3094bc9b..fe7514bada0 100644 --- a/aws_lambda_powertools/utilities/parser/models/dynamodb.py +++ b/aws_lambda_powertools/utilities/parser/models/dynamodb.py @@ -1,16 +1,16 @@ from datetime import date -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model class DynamoDBStreamChangedRecordModel(BaseModel): ApproximateCreationDateTime: Optional[date] Keys: Dict[str, Dict[str, Any]] - NewImage: Optional[Dict[str, Any]] - OldImage: Optional[Dict[str, Any]] + NewImage: Optional[Union[Dict[str, Any], Model]] + OldImage: Optional[Union[Dict[str, Any], Model]] SequenceNumber: str SizeBytes: int StreamViewType: Literal["NEW_AND_OLD_IMAGES", "KEYS_ONLY", "NEW_IMAGE", "OLD_IMAGE"] diff --git a/aws_lambda_powertools/utilities/parser/models/event_bridge.py b/aws_lambda_powertools/utilities/parser/models/event_bridge.py index a94daef0d4e..f98a263c680 100644 --- a/aws_lambda_powertools/utilities/parser/models/event_bridge.py +++ b/aws_lambda_powertools/utilities/parser/models/event_bridge.py @@ -1,8 +1,10 @@ from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Union from pydantic import BaseModel, Field +from aws_lambda_powertools.utilities.parser.types import Model + class EventBridgeModel(BaseModel): version: str @@ -13,5 +15,5 @@ class EventBridgeModel(BaseModel): region: str resources: List[str] detail_type: str = Field(None, alias="detail-type") - detail: Dict[str, Any] + detail: Union[Dict[str, Any], Model] replay_name: Optional[str] = Field(None, alias="replay-name") diff --git a/aws_lambda_powertools/utilities/parser/models/kinesis.py b/aws_lambda_powertools/utilities/parser/models/kinesis.py index 8979d3f102f..1c7c31c97b4 100644 --- a/aws_lambda_powertools/utilities/parser/models/kinesis.py +++ b/aws_lambda_powertools/utilities/parser/models/kinesis.py @@ -1,12 +1,11 @@ import base64 import logging from binascii import Error as BinAsciiError -from typing import List +from typing import List, Union from pydantic import BaseModel, validator -from pydantic.types import PositiveInt -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model logger = logging.getLogger(__name__) @@ -14,8 +13,8 @@ class KinesisDataStreamRecordPayload(BaseModel): kinesisSchemaVersion: str partitionKey: str - sequenceNumber: PositiveInt - data: bytes # base64 encoded str is parsed into bytes + sequenceNumber: str + data: Union[bytes, Model] # base64 encoded str is parsed into bytes approximateArrivalTimestamp: float @validator("data", pre=True, allow_reuse=True) diff --git a/aws_lambda_powertools/utilities/parser/models/s3.py b/aws_lambda_powertools/utilities/parser/models/s3.py index 4ec6a717f58..ae06c9f889a 100644 --- a/aws_lambda_powertools/utilities/parser/models/s3.py +++ b/aws_lambda_powertools/utilities/parser/models/s3.py @@ -6,7 +6,7 @@ from pydantic.networks import IPvAnyNetwork from pydantic.types import NonNegativeFloat -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal class S3EventRecordGlacierRestoreEventData(BaseModel): diff --git a/aws_lambda_powertools/utilities/parser/models/s3_object_event.py b/aws_lambda_powertools/utilities/parser/models/s3_object_event.py index 1fc10672746..778786bc8cb 100644 --- a/aws_lambda_powertools/utilities/parser/models/s3_object_event.py +++ b/aws_lambda_powertools/utilities/parser/models/s3_object_event.py @@ -1,7 +1,9 @@ -from typing import Dict, Optional +from typing import Dict, Optional, Union from pydantic import BaseModel, HttpUrl +from aws_lambda_powertools.utilities.parser.types import Model + class S3ObjectContext(BaseModel): inputS3Url: HttpUrl @@ -12,7 +14,7 @@ class S3ObjectContext(BaseModel): class S3ObjectConfiguration(BaseModel): accessPointArn: str supportingAccessPointArn: str - payload: str + payload: Union[str, Model] class S3ObjectUserRequest(BaseModel): diff --git a/aws_lambda_powertools/utilities/parser/models/sns.py b/aws_lambda_powertools/utilities/parser/models/sns.py index 856757c5464..cdcd9549a98 100644 --- a/aws_lambda_powertools/utilities/parser/models/sns.py +++ b/aws_lambda_powertools/utilities/parser/models/sns.py @@ -1,10 +1,10 @@ from datetime import datetime -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from pydantic import BaseModel, root_validator from pydantic.networks import HttpUrl -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model class SnsMsgAttributeModel(BaseModel): @@ -18,7 +18,7 @@ class SnsNotificationModel(BaseModel): UnsubscribeUrl: HttpUrl Type: Literal["Notification"] MessageAttributes: Optional[Dict[str, SnsMsgAttributeModel]] - Message: str + Message: Union[str, Model] MessageId: str SigningCertUrl: HttpUrl Signature: str diff --git a/aws_lambda_powertools/utilities/parser/models/sqs.py b/aws_lambda_powertools/utilities/parser/models/sqs.py index 184b1734f02..47871ab8840 100644 --- a/aws_lambda_powertools/utilities/parser/models/sqs.py +++ b/aws_lambda_powertools/utilities/parser/models/sqs.py @@ -1,9 +1,9 @@ from datetime import datetime -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Union from pydantic import BaseModel -from ..types import Literal +from aws_lambda_powertools.utilities.parser.types import Literal, Model class SqsAttributesModel(BaseModel): @@ -52,7 +52,7 @@ class SqsMsgAttributeModel(BaseModel): class SqsRecordModel(BaseModel): messageId: str receiptHandle: str - body: str + body: Union[str, Model] attributes: SqsAttributesModel messageAttributes: Dict[str, SqsMsgAttributeModel] md5OfBody: str diff --git a/aws_lambda_powertools/utilities/parser/parser.py b/aws_lambda_powertools/utilities/parser/parser.py index 7a7f6aec56b..ef939cd11f7 100644 --- a/aws_lambda_powertools/utilities/parser/parser.py +++ b/aws_lambda_powertools/utilities/parser/parser.py @@ -1,25 +1,23 @@ import logging -from typing import Any, Callable, Dict, Optional, Type, TypeVar, Union +from typing import Any, Callable, Dict, Optional, Type, overload + +from aws_lambda_powertools.utilities.parser.types import EnvelopeModel, EventParserReturnType, Model from ...middleware_factory import lambda_handler_decorator from ..typing import LambdaContext from .envelopes.base import Envelope from .exceptions import InvalidEnvelopeError, InvalidModelTypeError -from .types import Model logger = logging.getLogger(__name__) -EventParserReturnType = TypeVar("EventParserReturnType") - - @lambda_handler_decorator def event_parser( handler: Callable[[Any, LambdaContext], EventParserReturnType], event: Dict[str, Any], context: LambdaContext, model: Type[Model], - envelope: Optional[Union[Envelope, Type[Envelope]]] = None, + envelope: Optional[Type[Envelope]] = None, ) -> EventParserReturnType: """Lambda handler decorator to parse & validate events using Pydantic models @@ -81,14 +79,22 @@ def handler(event: Order, context: LambdaContext): InvalidEnvelopeError When envelope given does not implement BaseEnvelope """ - parsed_event = parse(event=event, model=model, envelope=envelope) + parsed_event = parse(event=event, model=model, envelope=envelope) if envelope else parse(event=event, model=model) logger.debug(f"Calling handler {handler.__name__}") return handler(parsed_event, context) -def parse( - event: Dict[str, Any], model: Type[Model], envelope: Optional[Union[Envelope, Type[Envelope]]] = None -) -> Model: +@overload +def parse(event: Dict[str, Any], model: Type[Model]) -> Model: + ... # pragma: no cover + + +@overload +def parse(event: Dict[str, Any], model: Type[Model], envelope: Type[Envelope]) -> EnvelopeModel: + ... # pragma: no cover + + +def parse(event: Dict[str, Any], model: Type[Model], envelope: Optional[Type[Envelope]] = None): """Standalone function to parse & validate events using Pydantic models Typically used when you need fine-grained control over error handling compared to event_parser decorator. diff --git a/aws_lambda_powertools/utilities/parser/types.py b/aws_lambda_powertools/utilities/parser/types.py index 2565e52c764..20958bd9c21 100644 --- a/aws_lambda_powertools/utilities/parser/types.py +++ b/aws_lambda_powertools/utilities/parser/types.py @@ -12,3 +12,5 @@ from typing_extensions import Literal # noqa: F401 Model = TypeVar("Model", bound=BaseModel) +EnvelopeModel = TypeVar("EnvelopeModel") +EventParserReturnType = TypeVar("EventParserReturnType") diff --git a/docs/core/event_handler/api_gateway.md b/docs/core/event_handler/api_gateway.md index 1f5fa4479c0..2e5c8af532a 100644 --- a/docs/core/event_handler/api_gateway.md +++ b/docs/core/event_handler/api_gateway.md @@ -22,48 +22,46 @@ You must have an existing [API Gateway Proxy integration](https://docs.aws.amazo This is the sample infrastructure for API Gateway we are using for the examples in this documentation. -=== "template.yml" - - ```yaml - AWSTemplateFormatVersion: '2010-09-09' - Transform: AWS::Serverless-2016-10-31 - Description: Hello world event handler API Gateway - - Globals: - Api: - TracingEnabled: true - Cors: # see CORS section - AllowOrigin: "'https://example.com'" - AllowHeaders: "'Content-Type,Authorization,X-Amz-Date'" - MaxAge: "'300'" - BinaryMediaTypes: # see Binary responses section - - '*~1*' # converts to */* for any binary type - Function: - Timeout: 5 - Runtime: python3.8 - Tracing: Active - Environment: - Variables: - LOG_LEVEL: INFO - POWERTOOLS_LOGGER_SAMPLE_RATE: 0.1 - POWERTOOLS_LOGGER_LOG_EVENT: true - POWERTOOLS_METRICS_NAMESPACE: MyServerlessApplication - POWERTOOLS_SERVICE_NAME: my_api-service - - Resources: - ApiFunction: - Type: AWS::Serverless::Function - Properties: - Handler: app.lambda_handler - CodeUri: api_handler/ - Description: API handler function - Events: - ApiEvent: - Type: Api - Properties: - Path: /{proxy+} # Send requests on any path to the lambda function - Method: ANY # Send requests using any http method to the lambda function - ``` +```yaml title="AWS Serverless Application Model (SAM) example" +AWSTemplateFormatVersion: '2010-09-09' +Transform: AWS::Serverless-2016-10-31 +Description: Hello world event handler API Gateway + +Globals: + Api: + TracingEnabled: true + Cors: # see CORS section + AllowOrigin: "'https://example.com'" + AllowHeaders: "'Content-Type,Authorization,X-Amz-Date'" + MaxAge: "'300'" + BinaryMediaTypes: # see Binary responses section + - '*~1*' # converts to */* for any binary type + Function: + Timeout: 5 + Runtime: python3.8 + Tracing: Active + Environment: + Variables: + LOG_LEVEL: INFO + POWERTOOLS_LOGGER_SAMPLE_RATE: 0.1 + POWERTOOLS_LOGGER_LOG_EVENT: true + POWERTOOLS_METRICS_NAMESPACE: MyServerlessApplication + POWERTOOLS_SERVICE_NAME: my_api-service + +Resources: + ApiFunction: + Type: AWS::Serverless::Function + Properties: + Handler: app.lambda_handler + CodeUri: api_handler/ + Description: API handler function + Events: + ApiEvent: + Type: Api + Properties: + Path: /{proxy+} # Send requests on any path to the lambda function + Method: ANY # Send requests using any http method to the lambda function +``` ### API Gateway decorator @@ -71,7 +69,8 @@ You can define your functions to match a path and HTTP method, when you use the Here's an example where we have two separate functions to resolve two paths: `/hello`. -!!! info "We automatically serialize `Dict` responses as JSON, trim whitespaces for compact responses, and set content-type to `application/json`" +???+ info + We automatically serialize `Dict` responses as JSON, trim whitespaces for compact responses, and set content-type to `application/json`. === "app.py" @@ -181,55 +180,51 @@ Here's an example where we have two separate functions to resolve two paths: `/h When using API Gateway HTTP API to front your Lambda functions, you can instruct `ApiGatewayResolver` to conform with their contract via `proxy_type` param: -=== "app.py" - - ```python hl_lines="3 7" - from aws_lambda_powertools import Logger, Tracer - from aws_lambda_powertools.logging import correlation_paths - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, ProxyEventType +```python hl_lines="3 7" title="Using HTTP API resolver" +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, ProxyEventType - tracer = Tracer() - logger = Logger() - app = ApiGatewayResolver(proxy_type=ProxyEventType.APIGatewayProxyEventV2) +tracer = Tracer() +logger = Logger() +app = ApiGatewayResolver(proxy_type=ProxyEventType.APIGatewayProxyEventV2) - @app.get("/hello") - @tracer.capture_method - def get_hello_universe(): - return {"message": "hello universe"} +@app.get("/hello") +@tracer.capture_method +def get_hello_universe(): + return {"message": "hello universe"} - # You can continue to use other utilities just as before - @logger.inject_lambda_context(correlation_id_path=correlation_paths.API_GATEWAY_HTTP) - @tracer.capture_lambda_handler - def lambda_handler(event, context): - return app.resolve(event, context) - ``` +# You can continue to use other utilities just as before +@logger.inject_lambda_context(correlation_id_path=correlation_paths.API_GATEWAY_HTTP) +@tracer.capture_lambda_handler +def lambda_handler(event, context): + return app.resolve(event, context) +``` #### ALB When using ALB to front your Lambda functions, you can instruct `ApiGatewayResolver` to conform with their contract via `proxy_type` param: -=== "app.py" +```python hl_lines="3 7" title="Using ALB resolver" +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, ProxyEventType - ```python hl_lines="3 7" - from aws_lambda_powertools import Logger, Tracer - from aws_lambda_powertools.logging import correlation_paths - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, ProxyEventType +tracer = Tracer() +logger = Logger() +app = ApiGatewayResolver(proxy_type=ProxyEventType.ALBEvent) - tracer = Tracer() - logger = Logger() - app = ApiGatewayResolver(proxy_type=ProxyEventType.ALBEvent) +@app.get("/hello") +@tracer.capture_method +def get_hello_universe(): + return {"message": "hello universe"} - @app.get("/hello") - @tracer.capture_method - def get_hello_universe(): - return {"message": "hello universe"} - - # You can continue to use other utilities just as before - @logger.inject_lambda_context(correlation_id_path=correlation_paths.APPLICATION_LOAD_BALANCER) - @tracer.capture_lambda_handler - def lambda_handler(event, context): - return app.resolve(event, context) - ``` +# You can continue to use other utilities just as before +@logger.inject_lambda_context(correlation_id_path=correlation_paths.APPLICATION_LOAD_BALANCER) +@tracer.capture_lambda_handler +def lambda_handler(event, context): + return app.resolve(event, context) +``` ### Dynamic routes @@ -309,13 +304,15 @@ You can also nest paths as configured earlier in [our sample infrastructure](#re #### Catch-all routes -!!! note "We recommend having explicit routes whenever possible; use catch-all routes sparingly" +???+ note + We recommend having explicit routes whenever possible; use catch-all routes sparingly. You can use a regex string to handle an arbitrary number of paths within a request, for example `.+`. You can also combine nested paths with greedy regex to catch in between routes. -!!! warning "We will choose the more explicit registered route that match incoming event" +???+ warning + We will choose the more explicit registered route that match incoming event. === "app.py" @@ -421,8 +418,8 @@ HTTP methods. } ``` -!!! note "It is usually better to have separate functions for each HTTP method, as the functionality tends to differ -depending on which method is used." +???+ note + It is usually better to have separate functions for each HTTP method, as the functionality tends to differ depending on which method is used. ### Accessing request details @@ -436,105 +433,183 @@ Within `app.current_event` property, you can access query strings as dictionary You can access the raw payload via `body` property, or if it's a JSON string you can quickly deserialize it via `json_body` property. -=== "app.py" - - ```python hl_lines="7-9 11" - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver +```python hl_lines="7-9 11" title="Accessing query strings, JSON payload, and raw payload" +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver - app = ApiGatewayResolver() +app = ApiGatewayResolver() - @app.get("/hello") - def get_hello_you(): - query_strings_as_dict = app.current_event.query_string_parameters - json_payload = app.current_event.json_body - payload = app.current_event.body +@app.get("/hello") +def get_hello_you(): + query_strings_as_dict = app.current_event.query_string_parameters + json_payload = app.current_event.json_body + payload = app.current_event.body - name = app.current_event.get_query_string_value(name="name", default_value="") - return {"message": f"hello {name}}"} + name = app.current_event.get_query_string_value(name="name", default_value="") + return {"message": f"hello {name}}"} - def lambda_handler(event, context): - return app.resolve(event, context) - ``` +def lambda_handler(event, context): + return app.resolve(event, context) +``` #### Headers Similarly to [Query strings](#query-strings-and-payload), you can access headers as dictionary via `app.current_event.headers`, or by name via `get_header_value`. -=== "app.py" +```python hl_lines="7-8" title="Accessing HTTP Headers" +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver - ```python hl_lines="7-8" - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver +app = ApiGatewayResolver() - app = ApiGatewayResolver() +@app.get("/hello") +def get_hello_you(): + headers_as_dict = app.current_event.headers + name = app.current_event.get_header_value(name="X-Name", default_value="") - @app.get("/hello") - def get_hello_you(): - headers_as_dict = app.current_event.headers - name = app.current_event.get_header_value(name="X-Name", default_value="") + return {"message": f"hello {name}}"} - return {"message": f"hello {name}}"} +def lambda_handler(event, context): + return app.resolve(event, context) +``` - def lambda_handler(event, context): - return app.resolve(event, context) - ``` -### Raising HTTP errors +### Handling not found routes -You can easily raise any HTTP Error back to the client using `ServiceError` exception. +By default, we return `404` for any unmatched route. -!!! info "If you need to send custom headers, use [Response](#fine-grained-responses) class instead." +You can use **`not_found`** decorator to override this behaviour, and return a custom **`Response`**. -Additionally, we provide pre-defined errors for the most popular ones such as HTTP 400, 401, 404, 500. +```python hl_lines="11 13 16" title="Handling not found" +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler import content_types +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, Response +from aws_lambda_powertools.event_handler.exceptions import NotFoundError -=== "app.py" +tracer = Tracer() +logger = Logger() +app = ApiGatewayResolver() - ```python hl_lines="4-10 20 25 30 35 39" - from aws_lambda_powertools import Logger, Tracer - from aws_lambda_powertools.logging import correlation_paths - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver - from aws_lambda_powertools.event_handler.exceptions import ( - BadRequestError, - InternalServerError, - NotFoundError, - ServiceError, - UnauthorizedError, - ) +@app.not_found +@tracer.capture_method +def handle_not_found_errors(exc: NotFoundError) -> Response: + # Return 418 upon 404 errors + logger.info(f"Not found route: {app.current_event.path}") + return Response( + status_code=418, + content_type=content_types.TEXT_PLAIN, + body="I'm a teapot!" + ) - tracer = Tracer() - logger = Logger() - app = ApiGatewayResolver() +@app.get("/catch/me/if/you/can") +@tracer.capture_method +def catch_me_if_you_can(): + return {"message": "oh hey"} + +@logger.inject_lambda_context(correlation_id_path=correlation_paths.API_GATEWAY_REST) +@tracer.capture_lambda_handler +def lambda_handler(event, context): + return app.resolve(event, context) +``` + + +### Exception handling + +You can use **`exception_handler`** decorator with any Python exception. This allows you to handle a common exception outside your route, for example validation errors. + +```python hl_lines="10 15" title="Exception handling" +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler import content_types +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver, Response + +tracer = Tracer() +logger = Logger() +app = ApiGatewayResolver() + +@app.exception_handler(ValueError) +def handle_value_error(ex: ValueError): + metadata = {"path": app.current_event.path} + logger.error(f"Malformed request: {ex}", extra=metadata) + + return Response( + status_code=400, + content_type=content_types.TEXT_PLAIN, + body="Invalid request", + ) - @app.get(rule="/bad-request-error") - def bad_request_error(): - # HTTP 400 - raise BadRequestError("Missing required parameter") - - @app.get(rule="/unauthorized-error") - def unauthorized_error(): - # HTTP 401 - raise UnauthorizedError("Unauthorized") - - @app.get(rule="/not-found-error") - def not_found_error(): - # HTTP 404 - raise NotFoundError - - @app.get(rule="/internal-server-error") - def internal_server_error(): - # HTTP 500 - raise InternalServerError("Internal server error") - - @app.get(rule="/service-error", cors=True) - def service_error(): - raise ServiceError(502, "Something went wrong!") - # alternatively - # from http import HTTPStatus - # raise ServiceError(HTTPStatus.BAD_GATEWAY.value, "Something went wrong) - - def handler(event, context): - return app.resolve(event, context) - ``` + +@app.get("/hello") +@tracer.capture_method +def hello_name(): + name = app.current_event.get_query_string_value(name="name") + if name is not None: + raise ValueError("name query string must be present") + return {"message": f"hello {name}"} + +@logger.inject_lambda_context(correlation_id_path=correlation_paths.API_GATEWAY_REST) +@tracer.capture_lambda_handler +def lambda_handler(event, context): + return app.resolve(event, context) +``` + + +### Raising HTTP errors + +You can easily raise any HTTP Error back to the client using `ServiceError` exception. + +???+ info + If you need to send custom headers, use [Response](#fine-grained-responses) class instead. + +Additionally, we provide pre-defined errors for the most popular ones such as HTTP 400, 401, 404, 500. + +```python hl_lines="4-10 20 25 30 35 39" title="Raising common HTTP Status errors (4xx, 5xx)" +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver +from aws_lambda_powertools.event_handler.exceptions import ( + BadRequestError, + InternalServerError, + NotFoundError, + ServiceError, + UnauthorizedError, +) + +tracer = Tracer() +logger = Logger() + +app = ApiGatewayResolver() + +@app.get(rule="/bad-request-error") +def bad_request_error(): + # HTTP 400 + raise BadRequestError("Missing required parameter") + +@app.get(rule="/unauthorized-error") +def unauthorized_error(): + # HTTP 401 + raise UnauthorizedError("Unauthorized") + +@app.get(rule="/not-found-error") +def not_found_error(): + # HTTP 404 + raise NotFoundError + +@app.get(rule="/internal-server-error") +def internal_server_error(): + # HTTP 500 + raise InternalServerError("Internal server error") + +@app.get(rule="/service-error", cors=True) +def service_error(): + raise ServiceError(502, "Something went wrong!") + # alternatively + # from http import HTTPStatus + # raise ServiceError(HTTPStatus.BAD_GATEWAY.value, "Something went wrong) + +def handler(event, context): + return app.resolve(event, context) +``` ### Custom Domain API Mappings @@ -577,7 +652,10 @@ This will lead to a HTTP 404 despite having your Lambda configured correctly. Se } ``` -Note: After removing a path prefix with `strip_prefixes`, the new root path will automatically be mapped to the path argument of `/`. For example, when using `strip_prefixes` value of `/pay`, there is no difference between a request path of `/pay` and `/pay/`; and the path argument would be defined as `/`. +???+ note + After removing a path prefix with `strip_prefixes`, the new root path will automatically be mapped to the path argument of `/`. + + For example, when using `strip_prefixes` value of `/pay`, there is no difference between a request path of `/pay` and `/pay/`; and the path argument would be defined as `/`. ## Advanced @@ -645,7 +723,8 @@ This will ensure that CORS headers are always returned as part of the response w } ``` -!!! tip "Optionally disable class on a per path basis with `cors=False` parameter" +???+ tip + Optionally disable CORS on a per path basis with `cors=False` parameter. #### Pre-flight @@ -657,7 +736,8 @@ For convenience, we automatically handle that for you as long as you [setup CORS For convenience, these are the default values when using `CORSConfig` to enable CORS: -!!! warning "Always configure `allow_origin` when using in production" +???+ warning + Always configure `allow_origin` when using in production. Key | Value | Note ------------------------------------------------- | --------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- @@ -710,7 +790,8 @@ You can use the `Response` class to have full control over the response, for exa You can compress with gzip and base64 encode your responses via `compress` parameter. -!!! warning "The client must send the `Accept-Encoding` header, otherwise a normal response will be sent" +???+ warning + The client must send the `Accept-Encoding` header, otherwise a normal response will be sent. === "app.py" @@ -760,7 +841,8 @@ For convenience, we automatically base64 encode binary responses. You can also u Like `compress` feature, the client must send the `Accept` header with the correct media type. -!!! warning "This feature requires API Gateway to configure binary media types, see [our sample infrastructure](#required-resources) for reference" +???+ warning + This feature requires API Gateway to configure binary media types, see [our sample infrastructure](#required-resources) for reference. === "app.py" @@ -855,66 +937,64 @@ You can enable debug mode via `debug` param, or via `POWERTOOLS_EVENT_HANDLER_DE This will enable full tracebacks errors in the response, print request and responses, and set CORS in development mode. -!!! warning "This might reveal sensitive information in your logs and relax CORS restrictions, use it sparingly." +???+ danger + This might reveal sensitive information in your logs and relax CORS restrictions, use it sparingly. -=== "debug.py" +```python hl_lines="3" title="Enabling debug mode" +from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver - ```python hl_lines="3" - from aws_lambda_powertools.event_handler.api_gateway import ApiGatewayResolver +app = ApiGatewayResolver(debug=True) - app = ApiGatewayResolver(debug=True) +@app.get("/hello") +def get_hello_universe(): + return {"message": "hello universe"} - @app.get("/hello") - def get_hello_universe(): - return {"message": "hello universe"} - - def lambda_handler(event, context): - return app.resolve(event, context) - ``` +def lambda_handler(event, context): + return app.resolve(event, context) +``` ### Custom serializer You can instruct API Gateway handler to use a custom serializer to best suit your needs, for example take into account Enums when serializing. -=== "custom_serializer.py" - ```python hl_lines="19-20 24" - import json - from enum import Enum - from json import JSONEncoder - from typing import Dict - - class CustomEncoder(JSONEncoder): - """Your customer json encoder""" - def default(self, obj): - if isinstance(obj, Enum): - return obj.value - try: - iterable = iter(obj) - except TypeError: - pass - else: - return sorted(iterable) - return JSONEncoder.default(self, obj) - - def custom_serializer(obj) -> str: - """Your custom serializer function ApiGatewayResolver will use""" - return json.dumps(obj, cls=CustomEncoder) - - # Assigning your custom serializer - app = ApiGatewayResolver(serializer=custom_serializer) - - class Color(Enum): - RED = 1 - BLUE = 2 - - @app.get("/colors") - def get_color() -> Dict: - return { - # Color.RED will be serialized to 1 as expected now - "color": Color.RED, - "variations": {"light", "dark"}, - } - ``` +```python hl_lines="19-20 24" title="Using a custom JSON serializer for responses" +import json +from enum import Enum +from json import JSONEncoder +from typing import Dict + +class CustomEncoder(JSONEncoder): + """Your customer json encoder""" + def default(self, obj): + if isinstance(obj, Enum): + return obj.value + try: + iterable = iter(obj) + except TypeError: + pass + else: + return sorted(iterable) + return JSONEncoder.default(self, obj) + +def custom_serializer(obj) -> str: + """Your custom serializer function ApiGatewayResolver will use""" + return json.dumps(obj, cls=CustomEncoder) + +# Assigning your custom serializer +app = ApiGatewayResolver(serializer=custom_serializer) + +class Color(Enum): + RED = 1 + BLUE = 2 + +@app.get("/colors") +def get_color() -> Dict: + return { + # Color.RED will be serialized to 1 as expected now + "color": Color.RED, + "variations": {"light", "dark"}, + } +``` ### Split routes with Router @@ -1189,7 +1269,8 @@ Event Handler naturally leads to a single Lambda function handling multiple rout Both single (monolithic) and multiple functions (micro) offer different set of trade-offs worth knowing. -!!! tip "TL;DR. Start with a monolithic function, add additional functions with new handlers, and possibly break into micro functions if necessary." +???+ tip + TL;DR. Start with a monolithic function, add additional functions with new handlers, and possibly break into micro functions if necessary. #### Monolithic function @@ -1197,13 +1278,13 @@ Both single (monolithic) and multiple functions (micro) offer different set of t A monolithic function means that your final code artifact will be deployed to a single function. This is generally the best approach to start. -**Benefits** +_**Benefits**_ * **Code reuse**. It's easier to reason about your service, modularize it and reuse code as it grows. Eventually, it can be turned into a standalone library. * **No custom tooling**. Monolithic functions are treated just like normal Python packages; no upfront investment in tooling. * **Faster deployment and debugging**. Whether you use all-at-once, linear, or canary deployments, a monolithic function is a single deployable unit. IDEs like PyCharm and VSCode have tooling to quickly profile, visualize, and step through debug any Python package. -**Downsides** +_**Downsides**_ * **Cold starts**. Frequent deployments and/or high load can diminish the benefit of monolithic functions depending on your latency requirements, due to [Lambda scaling model](https://docs.aws.amazon.com/lambda/latest/dg/invocation-scaling.html){target="_blank"}. Always load test to pragmatically balance between your customer experience and development cognitive load. * **Granular security permissions**. The micro function approach enables you to use fine-grained permissions & access controls, separate external dependencies & code signing at the function level. Conversely, you could have multiple functions while duplicating the final code artifact in a monolithic approach. @@ -1216,13 +1297,13 @@ A monolithic function means that your final code artifact will be deployed to a A micro function means that your final code artifact will be different to each function deployed. This is generally the approach to start if you're looking for fine-grain control and/or high load on certain parts of your service. -**Benefits** +_**Benefits**_ * **Granular scaling**. A micro function can benefit from the [Lambda scaling model](https://docs.aws.amazon.com/lambda/latest/dg/invocation-scaling.html){target="_blank"} to scale differently depending on each part of your application. Concurrency controls and provisioned concurrency can also be used at a granular level for capacity management. * **Discoverability**. Micro functions are easier do visualize when using distributed tracing. Their high-level architectures can be self-explanatory, and complexity is highly visible — assuming each function is named to the business purpose it serves. * **Package size**. An independent function can be significant smaller (KB vs MB) depending on external dependencies it require to perform its purpose. Conversely, a monolithic approach can benefit from [Lambda Layers](https://docs.aws.amazon.com/lambda/latest/dg/invocation-layers.html){target="_blank"} to optimize builds for external dependencies. -**Downsides** +_**Downsides**_ * **Upfront investment**. Python ecosystem doesn't use a bundler — you need a custom build tooling to ensure each function only has what it needs and account for [C bindings for runtime compatibility](https://docs.aws.amazon.com/lambda/latest/dg/lambda-runtimes.html){target="_blank"}. Operations become more elaborate — you need to standardize tracing labels/annotations, structured logging, and metrics to pinpoint root causes. - Engineering discipline is necessary for both approaches. Micro-function approach however requires further attention in consistency as the number of functions grow, just like any distributed system. diff --git a/docs/core/event_handler/appsync.md b/docs/core/event_handler/appsync.md index 7cf99fa8ce2..261440004cc 100644 --- a/docs/core/event_handler/appsync.md +++ b/docs/core/event_handler/appsync.md @@ -27,10 +27,10 @@ You must have an existing AppSync GraphQL API and IAM permissions to invoke your This is the sample infrastructure we are using for the initial examples with a AppSync Direct Lambda Resolver. -=== "schema.graphql" +???+ tip "Tip: Designing GraphQL Schemas for the first time?" + Visit [AWS AppSync schema documentation](https://docs.aws.amazon.com/appsync/latest/devguide/designing-your-schema.html){target="_blank"} for understanding how to define types, nesting, and pagination. - !!! tip "Designing GraphQL Schemas for the first time?" - Visit [AWS AppSync schema documentation](https://docs.aws.amazon.com/appsync/latest/devguide/designing-your-schema.html){target="_blank"} for understanding how to define types, nesting, and pagination. +=== "schema.graphql" ```typescript --8<-- "docs/shared/getting_started_schema.graphql" @@ -176,7 +176,8 @@ You can define your functions to match GraphQL types and fields with the `app.re Here's an example where we have two separate functions to resolve `getTodo` and `listTodos` fields within the `Query` type. For completion, we use Scalar type utilities to generate the right output based on our schema definition. -!!! info "GraphQL arguments are passed as function arguments" +???+ info + GraphQL arguments are passed as function arguments. === "app.py" @@ -395,30 +396,28 @@ You can nest `app.resolver()` decorator multiple times when resolving fields wit For Lambda Python3.8+ runtime, this utility supports async functions when you use in conjunction with `asyncio.run`. -=== "async_resolver.py" - - ```python hl_lines="4 8 10-12 20" - from aws_lambda_powertools import Logger, Tracer +```python hl_lines="4 8 10-12 20" title="Resolving GraphQL resolvers async" +from aws_lambda_powertools import Logger, Tracer - from aws_lambda_powertools.logging import correlation_paths - from aws_lambda_powertools.event_handler import AppSyncResolver +from aws_lambda_powertools.logging import correlation_paths +from aws_lambda_powertools.event_handler import AppSyncResolver - tracer = Tracer(service="sample_resolver") - logger = Logger(service="sample_resolver") - app = AppSyncResolver() +tracer = Tracer(service="sample_resolver") +logger = Logger(service="sample_resolver") +app = AppSyncResolver() - @app.resolver(type_name="Query", field_name="listTodos") - async def list_todos(): - todos = await some_async_io_call() - return todos +@app.resolver(type_name="Query", field_name="listTodos") +async def list_todos(): + todos = await some_async_io_call() + return todos - @logger.inject_lambda_context(correlation_id_path=correlation_paths.APPSYNC_RESOLVER) - @tracer.capture_lambda_handler - def lambda_handler(event, context): - result = app.resolve(event, context) +@logger.inject_lambda_context(correlation_id_path=correlation_paths.APPSYNC_RESOLVER) +@tracer.capture_lambda_handler +def lambda_handler(event, context): + result = app.resolve(event, context) - return asyncio.run(result) - ``` + return asyncio.run(result) +``` ### Amplify GraphQL Transformer @@ -426,38 +425,36 @@ Assuming you have [Amplify CLI installed](https://docs.amplify.aws/cli/start/ins -=== "schema.graphql" - - ```typescript hl_lines="7 15 20 22" - @model - type Merchant { - id: String! - name: String! - description: String - # Resolves to `common_field` - commonField: String @function(name: "merchantInfo-${env}") - } - - type Location { - id: ID! - name: String! - address: String - # Resolves to `common_field` - commonField: String @function(name: "merchantInfo-${env}") - } - - type Query { - # List of locations resolves to `list_locations` - listLocations(page: Int, size: Int): [Location] @function(name: "merchantInfo-${env}") - # List of locations resolves to `list_locations` - findMerchant(search: str): [Merchant] @function(name: "searchMerchant-${env}") - } - ``` +```typescript hl_lines="7 15 20 22" title="Example GraphQL Schema" +@model +type Merchant { + id: String! + name: String! + description: String + # Resolves to `common_field` + commonField: String @function(name: "merchantInfo-${env}") +} + +type Location { + id: ID! + name: String! + address: String + # Resolves to `common_field` + commonField: String @function(name: "merchantInfo-${env}") +} + +type Query { + # List of locations resolves to `list_locations` + listLocations(page: Int, size: Int): [Location] @function(name: "merchantInfo-${env}") + # List of locations resolves to `list_locations` + findMerchant(search: str): [Merchant] @function(name: "searchMerchant-${env}") +} +``` [Create two new basic Python functions](https://docs.amplify.aws/cli/function#set-up-a-function){target="_blank"} via `amplify add function`. -!!! note "Amplify CLI generated functions use `Pipenv` as a dependency manager" - Your function source code is located at **`amplify/backend/function/your-function-name`**. +???+ note + Amplify CLI generated functions use `Pipenv` as a dependency manager. Your function source code is located at **`amplify/backend/function/your-function-name`**. Within your function's folder, add Lambda Powertools as a dependency with `pipenv install aws-lambda-powertools`. @@ -713,7 +710,8 @@ You can subclass `AppSyncResolverEvent` to bring your own set of methods to hand ### Split operations with Router -!!! tip "Read the **[considerations section for trade-offs between monolithic and micro functions](./api_gateway.md#considerations){target="_blank"}**, as it's also applicable here." +???+ tip + Read the **[considerations section for trade-offs between monolithic and micro functions](./api_gateway.md#considerations){target="_blank"}**, as it's also applicable here. As you grow the number of related GraphQL operations a given Lambda function should handle, it is natural to split them into separate files to ease maintenance - That's where the `Router` feature is useful. diff --git a/docs/core/logger.md b/docs/core/logger.md index 833d5a5c721..6de24577b66 100644 --- a/docs/core/logger.md +++ b/docs/core/logger.md @@ -21,28 +21,29 @@ Setting | Description | Environment variable | Constructor parameter **Logging level** | Sets how verbose Logger should be (INFO, by default) | `LOG_LEVEL` | `level` **Service** | Sets **service** key that will be present across all log statements | `POWERTOOLS_SERVICE_NAME` | `service` -> Example using AWS Serverless Application Model (SAM) +???+ example + **AWS Serverless Application Model (SAM)** === "template.yaml" - ```yaml hl_lines="9 10" - Resources: - HelloWorldFunction: - Type: AWS::Serverless::Function - Properties: - Runtime: python3.8 - Environment: - Variables: - LOG_LEVEL: INFO - POWERTOOLS_SERVICE_NAME: example - ``` + ```yaml hl_lines="9 10" + Resources: + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Runtime: python3.8 + Environment: + Variables: + LOG_LEVEL: INFO + POWERTOOLS_SERVICE_NAME: example + ``` === "app.py" - ```python hl_lines="2 4" - from aws_lambda_powertools import Logger - logger = Logger() # Sets service via env var - # OR logger = Logger(service="example") - ``` + ```python hl_lines="2 4" + from aws_lambda_powertools import Logger + logger = Logger() # Sets service via env var + # OR logger = Logger(service="example") + ``` ### Standard structured keys @@ -129,26 +130,25 @@ Key | Example When debugging in non-production environments, you can instruct Logger to log the incoming event with `log_event` param or via `POWERTOOLS_LOGGER_LOG_EVENT` env var. -!!! warning - This is disabled by default to prevent sensitive info being logged. - -=== "log_handler_event.py" +???+ warning + This is disabled by default to prevent sensitive info being logged - ```python hl_lines="5" - from aws_lambda_powertools import Logger +```python hl_lines="5" title="Logging incoming event" +from aws_lambda_powertools import Logger - logger = Logger(service="payment") +logger = Logger(service="payment") - @logger.inject_lambda_context(log_event=True) - def handler(event, context): - ... - ``` +@logger.inject_lambda_context(log_event=True) +def handler(event, context): + ... +``` #### Setting a Correlation ID You can set a Correlation ID using `correlation_id_path` param by passing a [JMESPath expression](https://jmespath.org/tutorial.html){target="_blank"}. -!!! tip "You can retrieve correlation IDs via `get_correlation_id` method" +???+ tip + You can retrieve correlation IDs via `get_correlation_id` method === "collect.py" @@ -237,7 +237,7 @@ We provide [built-in JMESPath expressions](#built-in-correlation-id-expressions) ### Appending additional keys -!!! info "Custom keys are persisted across warm invocations" +???+ info "Info: Custom keys are persisted across warm invocations" Always set additional keys as part of your handler to ensure they have the latest value, or explicitly clear them with [`clear_state=True`](#clearing-all-state). You can append additional keys using either mechanism: @@ -247,7 +247,8 @@ You can append additional keys using either mechanism: #### append_keys method -> NOTE: `append_keys` replaces `structure_logs(append=True, **kwargs)` method. Both will continue to work until the next major version. +???+ note + `append_keys` replaces `structure_logs(append=True, **kwargs)` method. structure_logs will be removed in v2. You can append your own keys to your existing Logger via `append_keys(**additional_key_values)` method. @@ -279,7 +280,7 @@ You can append your own keys to your existing Logger via `append_keys(**addition } ``` -!!! tip "Logger will automatically reject any key with a None value" +???+ tip "Tip: Logger will automatically reject any key with a None value" If you conditionally add keys depending on the payload, you can follow the example above. This example will add `order_id` if its value is not empty, and in subsequent invocations where `order_id` might not be present it'll remove it from the Logger. @@ -290,7 +291,8 @@ Extra parameter is available for all log levels' methods, as implemented in the It accepts any dictionary, and all keyword arguments will be added as part of the root structure of the logs for that log statement. -!!! info "Any keyword argument added using `extra` will not be persisted for subsequent messages." +???+ info + Any keyword argument added using `extra` will not be persisted for subsequent messages. === "extra_parameter.py" @@ -436,10 +438,10 @@ You can remove any additional key from Logger state using `remove_keys`. Logger is commonly initialized in the global scope. Due to [Lambda Execution Context reuse](https://docs.aws.amazon.com/lambda/latest/dg/runtimes-context.html), this means that custom keys can be persisted across invocations. If you want all custom keys to be deleted, you can use `clear_state=True` param in `inject_lambda_context` decorator. -!!! info - This is useful when you add multiple custom keys conditionally, instead of setting a default `None` value if not present. Any key with `None` value is automatically removed by Logger. +???+ tip "Tip: When is this useful?" + It is useful when you add multiple custom keys conditionally, instead of setting a default `None` value if not present. Any key with `None` value is automatically removed by Logger. -!!! danger "This can have unintended side effects if you use Layers" +???+ danger "Danger: This can have unintended side effects if you use Layers" Lambda Layers code is imported before the Lambda handler. This means that `clear_state=True` will instruct Logger to remove any keys previously added before Lambda handler execution proceeds. @@ -502,7 +504,7 @@ Logger is commonly initialized in the global scope. Due to [Lambda Execution Con Use `logger.exception` method to log contextual information about exceptions. Logger will include `exception_name` and `exception` keys to aid troubleshooting and error enumeration. -!!! tip +???+ tip You can use your preferred Log Analytics tool to enumerate and visualize exceptions across all your services using `exception_name` key. === "collect.py" @@ -534,6 +536,21 @@ Use `logger.exception` method to log contextual information about exceptions. Lo ## Advanced +### Built-in Correlation ID expressions + +You can use any of the following built-in JMESPath expressions as part of [inject_lambda_context decorator](#setting-a-correlation-id). + +???+ note "Note: Any object key named with `-` must be escaped" + For example, **`request.headers."x-amzn-trace-id"`**. + +Name | Expression | Description +------------------------------------------------- | ------------------------------------------------- | --------------------------------------------------------------------------------- +**API_GATEWAY_REST** | `"requestContext.requestId"` | API Gateway REST API request ID +**API_GATEWAY_HTTP** | `"requestContext.requestId"` | API Gateway HTTP API request ID +**APPSYNC_RESOLVER** | `'request.headers."x-amzn-trace-id"'` | AppSync X-Ray Trace ID +**APPLICATION_LOAD_BALANCER** | `'headers."x-amzn-trace-id"'` | ALB X-Ray Trace ID +**EVENT_BRIDGE** | `"id"` | EventBridge Event ID + ### Reusing Logger across your code Logger supports inheritance via `child` parameter. This allows you to create multiple Loggers across your code base, and propagate changes such as new keys to all Loggers. @@ -564,7 +581,7 @@ Logger supports inheritance via `child` parameter. This allows you to create mul In this example, `Logger` will create a parent logger named `payment` and a child logger named `payment.shared`. Changes in either parent or child logger will be propagated bi-directionally. -!!! info "Child loggers will be named after the following convention `{service}.{filename}`" +???+ info "Info: Child loggers will be named after the following convention `{service}.{filename}`" If you forget to use `child` param but the `service` name is the same of the parent, we will return the existing parent `Logger` instead. ### Sampling debug logs @@ -573,15 +590,15 @@ Use sampling when you want to dynamically change your log level to **DEBUG** bas You can use values ranging from `0.0` to `1` (100%) when setting `POWERTOOLS_LOGGER_SAMPLE_RATE` env var or `sample_rate` parameter in Logger. -!!! tip "When is this useful?" +???+ tip "Tip: When is this useful?" Let's imagine a sudden spike increase in concurrency triggered a transient issue downstream. When looking into the logs you might not have enough information, and while you can adjust log levels it might not happen again. This feature takes into account transient issues where additional debugging information can be useful. Sampling decision happens at the Logger initialization. This means sampling may happen significantly more or less than depending on your traffic patterns, for example a steady low number of invocations and thus few cold starts. -!!! note - If you want Logger to calculate sampling upon every invocation, please open a [feature request](https://github.com/awslabs/aws-lambda-powertools-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=). +???+ note + Open a [feature request](https://github.com/awslabs/aws-lambda-powertools-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=) if you want Logger to calculate sampling for every invocation === "collect.py" @@ -643,15 +660,13 @@ Parameter | Description | Default **`log_record_order`** | set order of log keys when logging | `["level", "location", "message", "timestamp"]` **`kwargs`** | key-value to be included in log messages | `None` -=== "LambdaPowertoolsFormatter.py" +```python hl_lines="2 4-5" title="Pre-configuring Lambda Powertools Formatter" +from aws_lambda_powertools import Logger +from aws_lambda_powertools.logging.formatter import LambdaPowertoolsFormatter - ```python hl_lines="2 4-5" - from aws_lambda_powertools import Logger - from aws_lambda_powertools.logging.formatter import LambdaPowertoolsFormatter - - formatter = LambdaPowertoolsFormatter(utc=True, log_record_order=["message"]) - logger = Logger(service="example", logger_formatter=formatter) - ``` +formatter = LambdaPowertoolsFormatter(utc=True, log_record_order=["message"]) +logger = Logger(service="example", logger_formatter=formatter) +``` ### Migrating from other Loggers @@ -671,7 +686,7 @@ For inheritance, Logger uses a `child=True` parameter along with `service` being For child Loggers, we introspect the name of your module where `Logger(child=True, service="name")` is called, and we name your Logger as **{service}.{filename}**. -!!! danger +???+ danger A common issue when migrating from other Loggers is that `service` might be defined in the parent Logger (no child param), and not defined in the child Logger: === "incorrect_logger_inheritance.py" @@ -706,7 +721,7 @@ For child Loggers, we introspect the name of your module where `Logger(child=Tru In this case, Logger will register a Logger named `payment`, and a Logger named `service_undefined`. The latter isn't inheriting from the parent, and will have no handler, resulting in no message being logged to standard output. -!!! tip +???+ tip This can be fixed by either ensuring both has the `service` value as `payment`, or simply use the environment variable `POWERTOOLS_SERVICE_NAME` to ensure service value will be the same across all Loggers when not explicitly set. #### Overriding Log records @@ -779,17 +794,15 @@ You can change the order of [standard Logger keys](#standard-structured-keys) or By default, this Logger and standard logging library emits records using local time timestamp. You can override this behaviour via `utc` parameter: -=== "app.py" +```python hl_lines="6" title="Setting UTC timestamp by default" +from aws_lambda_powertools import Logger - ```python hl_lines="6" - from aws_lambda_powertools import Logger +logger = Logger(service="payment") +logger.info("Local time") - logger = Logger(service="payment") - logger.info("Local time") - - logger_in_utc = Logger(service="payment", utc=True) - logger_in_utc.info("GMT time zone") - ``` +logger_in_utc = Logger(service="payment", utc=True) +logger_in_utc.info("GMT time zone") +``` #### Custom function for unserializable values @@ -827,20 +840,18 @@ By default, Logger uses `str` to handle values non-serializable by JSON. You can By default, Logger uses StreamHandler and logs to standard output. You can override this behaviour via `logger_handler` parameter: -=== "collect.py" - - ```python hl_lines="3-4 9 12" - import logging - from pathlib import Path +```python hl_lines="3-4 9 12" title="Configure Logger to output to a file" +import logging +from pathlib import Path - from aws_lambda_powertools import Logger +from aws_lambda_powertools import Logger - log_file = Path("/tmp/log.json") - log_file_handler = logging.FileHandler(filename=log_file) - logger = Logger(service="payment", logger_handler=log_file_handler) +log_file = Path("/tmp/log.json") +log_file_handler = logging.FileHandler(filename=log_file) +logger = Logger(service="payment", logger_handler=log_file_handler) - logger.info("Collecting payment") - ``` +logger.info("Collecting payment") +``` #### Bring your own formatter @@ -850,7 +861,7 @@ For **minor changes like remapping keys** after all log record processing has co === "custom_formatter.py" - ```python + ```python hl_lines="6-7 12" from aws_lambda_powertools import Logger from aws_lambda_powertools.logging.formatter import LambdaPowertoolsFormatter @@ -862,15 +873,24 @@ For **minor changes like remapping keys** after all log record processing has co log["event"] = log.pop("message") # rename message key to event return self.json_serializer(log) # use configured json serializer - my_formatter = CustomFormatter() - logger = Logger(service="example", logger_formatter=my_formatter) + logger = Logger(service="example", logger_formatter=CustomFormatter()) logger.info("hello") ``` +=== "Example CloudWatch Logs excerpt" + ```json hl_lines="5" + { + "level": "INFO", + "location": ":16", + "timestamp": "2021-12-30 13:41:53,413+0100", + "event": "hello" + } + ``` + For **replacing the formatter entirely**, you can subclass `BasePowertoolsFormatter`, implement `append_keys` method, and override `format` standard logging method. This ensures the current feature set of Logger like [injecting Lambda context](#capturing-lambda-context-info) and [sampling](#sampling-debug-logs) will continue to work. -!!! info - You might need to implement `remove_keys` method if you make use of the feature too. +???+ info + You might need to implement `remove_keys` method if you make use of the feature too. === "collect.py" @@ -928,39 +948,22 @@ By default, Logger uses `json.dumps` and `json.loads` as serializer and deserial As parameters don't always translate well between them, you can pass any callable that receives a `Dict` and return a `str`: -=== "collect.py" - - ```python hl_lines="1 5-6 9-10" - import orjson - - from aws_lambda_powertools import Logger - - custom_serializer = orjson.dumps - custom_deserializer = orjson.loads +```python hl_lines="1 5-6 9-10" title="Using Rust orjson library as serializer" +import orjson - logger = Logger(service="payment", - json_serializer=custom_serializer, - json_deserializer=custom_deserializer - ) +from aws_lambda_powertools import Logger - # when using parameters, you can pass a partial - # custom_serializer=functools.partial(orjson.dumps, option=orjson.OPT_SERIALIZE_NUMPY) - ``` +custom_serializer = orjson.dumps +custom_deserializer = orjson.loads -## Built-in Correlation ID expressions +logger = Logger(service="payment", + json_serializer=custom_serializer, + json_deserializer=custom_deserializer +) -You can use any of the following built-in JMESPath expressions as part of [inject_lambda_context decorator](#setting-a-correlation-id). - -!!! note "Escaping necessary for the `-` character" - Any object key named with `-` must be escaped, for example **`request.headers."x-amzn-trace-id"`**. - -Name | Expression | Description -------------------------------------------------- | ------------------------------------------------- | --------------------------------------------------------------------------------- -**API_GATEWAY_REST** | `"requestContext.requestId"` | API Gateway REST API request ID -**API_GATEWAY_HTTP** | `"requestContext.requestId"` | API Gateway HTTP API request ID -**APPSYNC_RESOLVER** | `'request.headers."x-amzn-trace-id"'` | AppSync X-Ray Trace ID -**APPLICATION_LOAD_BALANCER** | `'headers."x-amzn-trace-id"'` | ALB X-Ray Trace ID -**EVENT_BRIDGE** | `"id"` | EventBridge Event ID +# when using parameters, you can pass a partial +# custom_serializer=functools.partial(orjson.dumps, option=orjson.OPT_SERIALIZE_NUMPY) +``` ## Testing your code @@ -1018,20 +1021,18 @@ This is a Pytest sample that provides the minimum information necessary for Logg your_lambda_handler(test_event, lambda_context) ``` -!!! tip - If you're using pytest and are looking to assert plain log messages, do check out the built-in [caplog fixture](https://docs.pytest.org/en/latest/how-to/logging.html){target="_blank"}. +???+ tip + Check out the built-in [Pytest caplog fixture](https://docs.pytest.org/en/latest/how-to/logging.html){target="_blank"} to assert plain log messages ### Pytest live log feature Pytest Live Log feature duplicates emitted log messages in order to style log statements according to their levels, for this to work use `POWERTOOLS_LOG_DEDUPLICATION_DISABLED` env var. -=== "shell" - - ```bash - POWERTOOLS_LOG_DEDUPLICATION_DISABLED="1" pytest -o log_cli=1 - ``` +```bash title="Disabling log deduplication to use Pytest live log" +POWERTOOLS_LOG_DEDUPLICATION_DISABLED="1" pytest -o log_cli=1 +``` -!!! warning +???+ warning This feature should be used with care, as it explicitly disables our ability to filter propagated messages to the root logger (if configured). ## FAQ @@ -1041,26 +1042,24 @@ Pytest Live Log feature duplicates emitted log messages in order to style log st You can enable the `botocore` and `boto3` logs by using the `set_stream_logger` method, this method will add a stream handler for the given name and level to the logging module. By default, this logs all boto3 messages to stdout. -=== "log_botocore_and_boto3.py" +```python hl_lines="6-7" title="Enabling AWS SDK logging" +from typing import Dict, List +from aws_lambda_powertools.utilities.typing import LambdaContext +from aws_lambda_powertools import Logger - ```python hl_lines="6-7" - from typing import Dict, List - from aws_lambda_powertools.utilities.typing import LambdaContext - from aws_lambda_powertools import Logger +import boto3 +boto3.set_stream_logger() +boto3.set_stream_logger('botocore') - import boto3 - boto3.set_stream_logger() - boto3.set_stream_logger('botocore') +logger = Logger() +client = boto3.client('s3') - logger = Logger() - client = boto3.client('s3') +def handler(event: Dict, context: LambdaContext) -> List: + response = client.list_buckets() - def handler(event: Dict, context: LambdaContext) -> List: - response = client.list_buckets() - - return response.get("Buckets", []) - ``` + return response.get("Buckets", []) +``` **What's the difference between `append_keys` and `extra`?** @@ -1112,6 +1111,4 @@ Here's an example where we persist `payment_id` not `request_id`. Note that `pay **How do I aggregate and search Powertools logs across accounts?** -As of now, ElasticSearch (ELK) or 3rd party solutions are best suited to this task. - -Please see this discussion for more information: https://github.com/awslabs/aws-lambda-powertools-python/issues/460 +As of now, ElasticSearch (ELK) or 3rd party solutions are best suited to this task. Please refer to this [discussion for more details](https://github.com/awslabs/aws-lambda-powertools-python/issues/460) diff --git a/docs/core/metrics.md b/docs/core/metrics.md index d4bd9a0727e..99ee17106b3 100644 --- a/docs/core/metrics.md +++ b/docs/core/metrics.md @@ -35,41 +35,45 @@ Setting | Description | Environment variable | Constructor parameter **Metric namespace** | Logical container where all metrics will be placed e.g. `ServerlessAirline` | `POWERTOOLS_METRICS_NAMESPACE` | `namespace` **Service** | Optionally, sets **service** metric dimension across all metrics e.g. `payment` | `POWERTOOLS_SERVICE_NAME` | `service` -!!! tip "Use your application or main service as the metric namespace to easily group all metrics" +???+ tip + Use your application or main service as the metric namespace to easily group all metrics. -> Example using AWS Serverless Application Model (SAM) +???+ example + **AWS Serverless Application Model (SAM)** === "template.yml" - ```yaml hl_lines="9 10" - Resources: - HelloWorldFunction: - Type: AWS::Serverless::Function - Properties: - Runtime: python3.8 - Environment: - Variables: - POWERTOOLS_SERVICE_NAME: payment - POWERTOOLS_METRICS_NAMESPACE: ServerlessAirline - ``` + ```yaml hl_lines="9 10" + Resources: + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Runtime: python3.8 + Environment: + Variables: + POWERTOOLS_SERVICE_NAME: payment + POWERTOOLS_METRICS_NAMESPACE: ServerlessAirline + ``` === "app.py" - ```python hl_lines="4 6" - from aws_lambda_powertools import Metrics - from aws_lambda_powertools.metrics import MetricUnit + ```python hl_lines="4 6" + from aws_lambda_powertools import Metrics + from aws_lambda_powertools.metrics import MetricUnit - metrics = Metrics() # Sets metric namespace and service via env var - # OR - metrics = Metrics(namespace="ServerlessAirline", service="orders") # Sets metric namespace, and service as a metric dimension - ``` + metrics = Metrics() # Sets metric namespace and service via env var + # OR + metrics = Metrics(namespace="ServerlessAirline", service="orders") # Sets metric namespace, and service as a metric dimension + ``` -You can initialize Metrics anywhere in your code - It'll keep track of your aggregate metrics in memory. ### Creating metrics You can create metrics using `add_metric`, and you can create dimensions for all your aggregate metrics using `add_dimension` method. +???+ tip + You can initialize Metrics in any other module too. It'll keep track of your aggregate metrics in memory to optimize costs (one blob instead of multiples). + === "Metrics" ```python hl_lines="8" @@ -96,13 +100,13 @@ You can create metrics using `add_metric`, and you can create dimensions for all metrics.add_metric(name="SuccessfulBooking", unit=MetricUnit.Count, value=1) ``` -!!! tip "Autocomplete Metric Units" +???+ tip "Tip: Autocomplete Metric Units" `MetricUnit` enum facilitate finding a supported metric unit by CloudWatch. Alternatively, you can pass the value as a string if you already know them e.g. "Count". -!!! note "Metrics overflow" +???+ note "Note: Metrics overflow" CloudWatch EMF supports a max of 100 metrics per batch. Metrics utility will flush all metrics when adding the 100th metric. Subsequent metrics, e.g. 101th, will be aggregated into a new EMF object, for your convenience. -!!! warning "Do not create metrics or dimensions outside the handler" +???+ warning "Warning: Do not create metrics or dimensions outside the handler" Metrics or dimensions added in the global scope will only be added during cold start. Disregard if you that's the intended behaviour. ### Adding default dimensions @@ -184,7 +188,7 @@ This decorator also **validates**, **serializes**, and **flushes** all your metr } ``` -!!! tip "Metric validation" +???+ tip "Tip: Metric validation" If metrics are provided, and any of the following criteria are not met, **`SchemaValidationError`** exception will be raised: * Maximum of 9 dimensions @@ -193,57 +197,51 @@ This decorator also **validates**, **serializes**, and **flushes** all your metr #### Raising SchemaValidationError on empty metrics -If you want to ensure that at least one metric is emitted, you can pass `raise_on_empty_metrics` to the **log_metrics** decorator: +If you want to ensure at least one metric is always emitted, you can pass `raise_on_empty_metrics` to the **log_metrics** decorator: -=== "app.py" +```python hl_lines="5" title="Raising SchemaValidationError exception if no metrics are added" +from aws_lambda_powertools.metrics import Metrics - ```python hl_lines="5" - from aws_lambda_powertools.metrics import Metrics +metrics = Metrics() - metrics = Metrics() +@metrics.log_metrics(raise_on_empty_metrics=True) +def lambda_handler(evt, ctx): + ... +``` - @metrics.log_metrics(raise_on_empty_metrics=True) - def lambda_handler(evt, ctx): - ... - ``` - -!!! tip "Suppressing warning messages on empty metrics" +???+ tip "Suppressing warning messages on empty metrics" If you expect your function to execute without publishing metrics every time, you can suppress the warning with **`warnings.filterwarnings("ignore", "No metrics to publish*")`**. #### Nesting multiple middlewares When using multiple middlewares, use `log_metrics` as your **last decorator** wrapping all subsequent ones to prevent early Metric validations when code hasn't been run yet. -=== "nested_middlewares.py" - - ```python hl_lines="7-8" - from aws_lambda_powertools import Metrics, Tracer - from aws_lambda_powertools.metrics import MetricUnit +```python hl_lines="7-8" title="Example with multiple decorators" +from aws_lambda_powertools import Metrics, Tracer +from aws_lambda_powertools.metrics import MetricUnit - tracer = Tracer(service="booking") - metrics = Metrics(namespace="ExampleApplication", service="booking") +tracer = Tracer(service="booking") +metrics = Metrics(namespace="ExampleApplication", service="booking") - @metrics.log_metrics - @tracer.capture_lambda_handler - def lambda_handler(evt, ctx): - metrics.add_metric(name="BookingConfirmation", unit=MetricUnit.Count, value=1) - ``` +@metrics.log_metrics +@tracer.capture_lambda_handler +def lambda_handler(evt, ctx): + metrics.add_metric(name="BookingConfirmation", unit=MetricUnit.Count, value=1) +``` ### Capturing cold start metric You can optionally capture cold start metrics with `log_metrics` decorator via `capture_cold_start_metric` param. -=== "app.py" +```python hl_lines="5" title="Generating function cold start metric" +from aws_lambda_powertools import Metrics - ```python hl_lines="5" - from aws_lambda_powertools import Metrics +metrics = Metrics(service="ExampleService") - metrics = Metrics(service="ExampleService") - - @metrics.log_metrics(capture_cold_start_metric=True) - def lambda_handler(evt, ctx): - ... - ``` +@metrics.log_metrics(capture_cold_start_metric=True) +def lambda_handler(evt, ctx): + ... +``` If it's a cold start invocation, this feature will: @@ -252,7 +250,8 @@ If it's a cold start invocation, this feature will: This has the advantage of keeping cold start metric separate from your application metrics, where you might have unrelated dimensions. -!!! info "We do not emit 0 as a value for ColdStart metric for cost reasons. [Let us know](https://github.com/awslabs/aws-lambda-powertools-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=) if you'd prefer a flag to override it" +???+ info + We do not emit 0 as a value for ColdStart metric for cost reasons. [Let us know](https://github.com/awslabs/aws-lambda-powertools-python/issues/new?assignees=&labels=feature-request%2C+triage&template=feature_request.md&title=) if you'd prefer a flag to override it. ## Advanced @@ -260,7 +259,7 @@ This has the advantage of keeping cold start metric separate from your applicati You can add high-cardinality data as part of your Metrics log with `add_metadata` method. This is useful when you want to search highly contextual information along with your metrics in your logs. -!!! info +???+ info **This will not be available during metrics visualization** - Use **dimensions** for this purpose === "app.py" @@ -310,90 +309,72 @@ You can add high-cardinality data as part of your Metrics log with `add_metadata CloudWatch EMF uses the same dimensions across all your metrics. Use `single_metric` if you have a metric that should have different dimensions. -!!! info +???+ info Generally, this would be an edge case since you [pay for unique metric](https://aws.amazon.com/cloudwatch/pricing). Keep the following formula in mind: **unique metric = (metric_name + dimension_name + dimension_value)** -=== "single_metric.py" - - ```python hl_lines="6-7" - from aws_lambda_powertools import single_metric - from aws_lambda_powertools.metrics import MetricUnit +```python hl_lines="6-7" title="Generating an EMF blob with a single metric" +from aws_lambda_powertools import single_metric +from aws_lambda_powertools.metrics import MetricUnit - def lambda_handler(evt, ctx): - with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="ExampleApplication") as metric: - metric.add_dimension(name="function_context", value="$LATEST") - ... - ``` +def lambda_handler(evt, ctx): + with single_metric(name="ColdStart", unit=MetricUnit.Count, value=1, namespace="ExampleApplication") as metric: + metric.add_dimension(name="function_context", value="$LATEST") + ... +``` ### Flushing metrics manually If you prefer not to use `log_metrics` because you might want to encapsulate additional logic when doing so, you can manually flush and clear metrics as follows: -!!! warning - Metrics, dimensions and namespace validation still applies. +???+ warning + Metrics, dimensions and namespace validation still applies -=== "manual_metric_serialization.py" +```python hl_lines="9-11" title="Manually flushing and clearing metrics from memory" +import json +from aws_lambda_powertools import Metrics +from aws_lambda_powertools.metrics import MetricUnit - ```python hl_lines="9-11" - import json - from aws_lambda_powertools import Metrics - from aws_lambda_powertools.metrics import MetricUnit +metrics = Metrics(namespace="ExampleApplication", service="booking") - metrics = Metrics(namespace="ExampleApplication", service="booking") - - def lambda_handler(evt, ctx): - metrics.add_metric(name="ColdStart", unit=MetricUnit.Count, value=1) - your_metrics_object = metrics.serialize_metric_set() - metrics.clear_metrics() - print(json.dumps(your_metrics_object)) - ``` +def lambda_handler(evt, ctx): + metrics.add_metric(name="ColdStart", unit=MetricUnit.Count, value=1) + your_metrics_object = metrics.serialize_metric_set() + metrics.clear_metrics() + print(json.dumps(your_metrics_object)) +``` ## Testing your code ### Environment variables -Use `POWERTOOLS_METRICS_NAMESPACE` and `POWERTOOLS_SERVICE_NAME` env vars when unit testing your code to ensure metric namespace and dimension objects are created, and your code doesn't fail validation. - -=== "shell" - - ```bash - POWERTOOLS_SERVICE_NAME="Example" POWERTOOLS_METRICS_NAMESPACE="Application" python -m pytest - ``` - -If you prefer setting environment variable for specific tests, and are using Pytest, you can use [monkeypatch](https://docs.pytest.org/en/latest/monkeypatch.html) fixture: - -=== "pytest_env_var.py" +???+ tip + Ignore this section, if you are explicitly setting namespace/default dimension via `namespace` and `service` parameters. - ```python - def test_namespace_env_var(monkeypatch): - # Set POWERTOOLS_METRICS_NAMESPACE before initializating Metrics - monkeypatch.setenv("POWERTOOLS_METRICS_NAMESPACE", namespace) + For example, `Metrics(namespace=ApplicationName, service=ServiceName)` - metrics = Metrics() - ... - ``` +Use `POWERTOOLS_METRICS_NAMESPACE` and `POWERTOOLS_SERVICE_NAME` env vars when unit testing your code to ensure metric namespace and dimension objects are created, and your code doesn't fail validation. -> Ignore this, if you are explicitly setting namespace/default dimension via `namespace` and `service` parameters: `metrics = Metrics(namespace=ApplicationName, service=ServiceName)` +```bash title="Injecting dummy Metric Namespace before running tests" +POWERTOOLS_SERVICE_NAME="Example" POWERTOOLS_METRICS_NAMESPACE="Application" python -m pytest +``` ### Clearing metrics `Metrics` keep metrics in memory across multiple instances. If you need to test this behaviour, you can use the following Pytest fixture to ensure metrics are reset incl. cold start: -=== "pytest_metrics_reset_fixture.py" - - ```python - @pytest.fixture(scope="function", autouse=True) - def reset_metric_set(): - # Clear out every metric data prior to every test - metrics = Metrics() - metrics.clear_metrics() - metrics_global.is_cold_start = True # ensure each test has cold start - metrics.clear_default_dimensions() # remove persisted default dimensions, if any - yield - ``` +```python title="Clearing metrics between tests" +@pytest.fixture(scope="function", autouse=True) +def reset_metric_set(): + # Clear out every metric data prior to every test + metrics = Metrics() + metrics.clear_metrics() + metrics_global.is_cold_start = True # ensure each test has cold start + metrics.clear_default_dimensions() # remove persisted default dimensions, if any + yield +``` ### Functional testing @@ -465,4 +446,5 @@ As metrics are logged to standard output, you can read standard output and asser assert "SuccessfulBooking" in custom_metrics_blob # as per previous example ``` -!!! tip "For more elaborate assertions and comparisons, check out [our functional testing for Metrics utility](https://github.com/awslabs/aws-lambda-powertools-python/blob/develop/tests/functional/test_metrics.py)" +???+ tip + For more elaborate assertions and comparisons, check out [our functional testing for Metrics utility.](https://github.com/awslabs/aws-lambda-powertools-python/blob/develop/tests/functional/test_metrics.py) diff --git a/docs/core/tracer.md b/docs/core/tracer.md index 9e94d2549d9..f97e4477acf 100644 --- a/docs/core/tracer.md +++ b/docs/core/tracer.md @@ -20,42 +20,36 @@ Tracer is an opinionated thin wrapper for [AWS X-Ray Python SDK](https://github. Before your use this utility, your AWS Lambda function [must have permissions](https://docs.aws.amazon.com/lambda/latest/dg/services-xray.html#services-xray-permissions) to send traces to AWS X-Ray. -> Example using AWS Serverless Application Model (SAM) - -=== "template.yml" - - ```yaml hl_lines="6 9" - Resources: - HelloWorldFunction: - Type: AWS::Serverless::Function - Properties: - Runtime: python3.8 - Tracing: Active - Environment: - Variables: - POWERTOOLS_SERVICE_NAME: example - ``` +```yaml hl_lines="6 9" title="AWS Serverless Application Model (SAM) example" +Resources: + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Runtime: python3.8 + Tracing: Active + Environment: + Variables: + POWERTOOLS_SERVICE_NAME: example +``` ### Lambda handler -You can quickly start by importing the `Tracer` class, initialize it outside the Lambda handler, and use `capture_lambda_handler` decorator. - -=== "app.py" +You can quickly start by initializing `Tracer` and use `capture_lambda_handler` decorator for your Lambda handler. - ```python hl_lines="1 3 6" - from aws_lambda_powertools import Tracer +```python hl_lines="1 3 6" title="Tracing Lambda handler with capture_lambda_handler" +from aws_lambda_powertools import Tracer - tracer = Tracer() # Sets service via env var - # OR tracer = Tracer(service="example") +tracer = Tracer() # Sets service via env var +# OR tracer = Tracer(service="example") - @tracer.capture_lambda_handler - def handler(event, context): - charge_id = event.get('charge_id') - payment = collect_payment(charge_id) - ... - ``` +@tracer.capture_lambda_handler +def handler(event, context): + charge_id = event.get('charge_id') + payment = collect_payment(charge_id) + ... +``` -When using this `capture_lambda_handler` decorator, Tracer performs these additional tasks to ease operations: +`capture_lambda_handler` performs these additional tasks to ease operations: * Creates a `ColdStart` annotation to easily filter traces that have had an initialization overhead * Creates a `Service` annotation if `service` parameter or `POWERTOOLS_SERVICE_NAME` is set @@ -65,57 +59,53 @@ When using this `capture_lambda_handler` decorator, Tracer performs these additi **Annotations** are key-values associated with traces and indexed by AWS X-Ray. You can use them to filter traces and to create [Trace Groups](https://aws.amazon.com/about-aws/whats-new/2018/11/aws-xray-adds-the-ability-to-group-traces/) to slice and dice your transactions. -**Metadata** are key-values also associated with traces but not indexed by AWS X-Ray. You can use them to add additional context for an operation using any native object. - -=== "Annotations" - You can add annotations using `put_annotation` method. +```python hl_lines="7" title="Adding annotations with put_annotation method" +from aws_lambda_powertools import Tracer +tracer = Tracer() - ```python hl_lines="7" - from aws_lambda_powertools import Tracer - tracer = Tracer() +@tracer.capture_lambda_handler +def handler(event, context): + ... + tracer.put_annotation(key="PaymentStatus", value="SUCCESS") +``` - @tracer.capture_lambda_handler - def handler(event, context): - ... - tracer.put_annotation(key="PaymentStatus", value="SUCCESS") - ``` -=== "Metadata" - You can add metadata using `put_metadata` method. +**Metadata** are key-values also associated with traces but not indexed by AWS X-Ray. You can use them to add additional context for an operation using any native object. - ```python hl_lines="8" - from aws_lambda_powertools import Tracer - tracer = Tracer() +```python hl_lines="8" title="Adding arbitrary metadata with put_metadata method" +from aws_lambda_powertools import Tracer +tracer = Tracer() - @tracer.capture_lambda_handler - def handler(event, context): - ... - ret = some_logic() - tracer.put_metadata(key="payment_response", value=ret) - ``` +@tracer.capture_lambda_handler +def handler(event, context): + ... + ret = some_logic() + tracer.put_metadata(key="payment_response", value=ret) +``` ### Synchronous functions You can trace synchronous functions using the `capture_method` decorator. -!!! warning - **When `capture_response` is enabled, the function response will be read and serialized as json.** +```python hl_lines="7 13" title="Tracing an arbitrary function with capture_method" +@tracer.capture_method +def collect_payment(charge_id): + ret = requests.post(PAYMENT_ENDPOINT) # logic + tracer.put_annotation("PAYMENT_STATUS", "SUCCESS") # custom annotation + return ret +``` - The serialization is performed by the aws-xray-sdk which uses the `jsonpickle` module. This can cause - unintended consequences if there are side effects to recursively reading the returned value, for example if the - decorated function response contains a file-like object or a `StreamingBody` for S3 objects. +???+ note "Note: Function responses are auto-captured and stored as JSON, by default." + + Use [capture_response](#disabling-response-auto-capture) parameter to override this behaviour. + + The serialization is performed by aws-xray-sdk via `jsonpickle` module. This can cause + side effects for file-like objects like boto S3 `StreamingBody`, where its response will be read only once during serialization. - ```python hl_lines="7 13" - @tracer.capture_method - def collect_payment(charge_id): - ret = requests.post(PAYMENT_ENDPOINT) # logic - tracer.put_annotation("PAYMENT_STATUS", "SUCCESS") # custom annotation - return ret - ``` ### Asynchronous and generator functions -!!! warning - **We do not support async Lambda handler** - Lambda handler itself must be synchronous +???+ warning + We do not support asynchronous Lambda handler You can trace asynchronous functions and generator functions (including context managers) using `capture_method`. @@ -164,21 +154,6 @@ You can trace asynchronous functions and generator functions (including context ... ``` -The decorator will detect whether your function is asynchronous, a generator, or a context manager and adapt its behaviour accordingly. - -=== "app.py" - - ```python - @tracer.capture_lambda_handler - def handler(evt, ctx): - asyncio.run(collect_payment()) - - with collect_payment_ctxman as result: - do_something_with(result) - - another_result = list(collect_payment_gen()) - ``` - ## Advanced ### Patching modules @@ -187,26 +162,21 @@ Tracer automatically patches all [supported libraries by X-Ray](https://docs.aws If you're looking to shave a few microseconds, or milliseconds depending on your function memory configuration, you can patch specific modules using `patch_modules` param: -=== "app.py" +```python hl_lines="7" title="Example of explicitly patching boto3 and requests only" +import boto3 +import requests - ```python hl_lines="7" - import boto3 - import requests +from aws_lambda_powertools import Tracer - from aws_lambda_powertools import Tracer - - modules_to_be_patched = ["boto3", "requests"] - tracer = Tracer(patch_modules=modules_to_be_patched) - ``` +modules_to_be_patched = ["boto3", "requests"] +tracer = Tracer(patch_modules=modules_to_be_patched) +``` ### Disabling response auto-capture -> New in 1.9.0 - Use **`capture_response=False`** parameter in both `capture_lambda_handler` and `capture_method` decorators to instruct Tracer **not** to serialize function responses as metadata. -!!! info "This is commonly useful in three scenarios" - +???+ info "Info: This is useful in three common scenarios" 1. You might **return sensitive** information you don't want it to be added to your traces 2. You might manipulate **streaming objects that can be read only once**; this prevents subsequent calls from being empty 3. You might return **more than 64K** of data _e.g., `message too long` error_ @@ -238,48 +208,41 @@ Use **`capture_response=False`** parameter in both `capture_lambda_handler` and ### Disabling exception auto-capture -> New in 1.10.0 - Use **`capture_error=False`** parameter in both `capture_lambda_handler` and `capture_method` decorators to instruct Tracer **not** to serialize exceptions as metadata. -!!! info "Commonly useful in one scenario" - - 1. You might **return sensitive** information from exceptions, stack traces you might not control +???+ info + Useful when returning sensitive information in exceptions/stack traces you don't control -=== "sensitive_data_exception.py" +```python hl_lines="3 5" title="Disabling exception auto-capture for tracing metadata" +from aws_lambda_powertools import Tracer - ```python hl_lines="3 5" - from aws_lambda_powertools import Tracer - - @tracer.capture_lambda_handler(capture_error=False) - def handler(event, context): - raise ValueError("some sensitive info in the stack trace...") - ``` +@tracer.capture_lambda_handler(capture_error=False) +def handler(event, context): + raise ValueError("some sensitive info in the stack trace...") +``` ### Tracing aiohttp requests -!!! info - This snippet assumes you have **aiohttp** as a dependency +???+ info + This snippet assumes you have aiohttp as a dependency You can use `aiohttp_trace_config` function to create a valid [aiohttp trace_config object](https://docs.aiohttp.org/en/stable/tracing_reference.html). This is necessary since X-Ray utilizes aiohttp trace hooks to capture requests end-to-end. -=== "aiohttp_example.py" +```python hl_lines="5 10" title="Tracing aiohttp requests" +import asyncio +import aiohttp - ```python hl_lines="5 10" - import asyncio - import aiohttp +from aws_lambda_powertools import Tracer +from aws_lambda_powertools.tracing import aiohttp_trace_config - from aws_lambda_powertools import Tracer - from aws_lambda_powertools.tracing import aiohttp_trace_config +tracer = Tracer() - tracer = Tracer() - - async def aiohttp_task(): - async with aiohttp.ClientSession(trace_configs=[aiohttp_trace_config()]) as session: - async with session.get("https://httpbin.org/json") as resp: - resp = await resp.json() - return resp - ``` +async def aiohttp_task(): + async with aiohttp.ClientSession(trace_configs=[aiohttp_trace_config()]) as session: + async with session.get("https://httpbin.org/json") as resp: + resp = await resp.json() + return resp +``` ### Escape hatch mechanism @@ -287,59 +250,57 @@ You can use `tracer.provider` attribute to access all methods provided by AWS X- This is useful when you need a feature available in X-Ray that is not available in the Tracer utility, for example [thread-safe](https://github.com/aws/aws-xray-sdk-python/#user-content-trace-threadpoolexecutor), or [context managers](https://github.com/aws/aws-xray-sdk-python/#user-content-start-a-custom-segmentsubsegment). -=== "escape_hatch_context_manager_example.py" +```python hl_lines="7" title="Tracing a code block with in_subsegment escape hatch" +from aws_lambda_powertools import Tracer - ```python hl_lines="7" - from aws_lambda_powertools import Tracer +tracer = Tracer() - tracer = Tracer() - - @tracer.capture_lambda_handler - def handler(event, context): - with tracer.provider.in_subsegment('## custom subsegment') as subsegment: - ret = some_work() - subsegment.put_metadata('response', ret) - ``` +@tracer.capture_lambda_handler +def handler(event, context): + with tracer.provider.in_subsegment('## custom subsegment') as subsegment: + ret = some_work() + subsegment.put_metadata('response', ret) +``` ### Concurrent asynchronous functions -!!! warning - [As of now, X-Ray SDK will raise an exception when async functions are run and traced concurrently](https://github.com/aws/aws-xray-sdk-python/issues/164) +???+ warning + [X-Ray SDK will raise an exception](https://github.com/aws/aws-xray-sdk-python/issues/164) when async functions are run and traced concurrently A safe workaround mechanism is to use `in_subsegment_async` available via Tracer escape hatch (`tracer.provider`). -=== "concurrent_async_workaround.py" +```python hl_lines="6 7 12 15 17" title="Workaround to safely trace async concurrent functions" +import asyncio - ```python hl_lines="6 7 12 15 17" - import asyncio +from aws_lambda_powertools import Tracer +tracer = Tracer() - from aws_lambda_powertools import Tracer - tracer = Tracer() +async def another_async_task(): + async with tracer.provider.in_subsegment_async("## another_async_task") as subsegment: + subsegment.put_annotation(key="key", value="value") + subsegment.put_metadata(key="key", value="value", namespace="namespace") + ... - async def another_async_task(): - async with tracer.provider.in_subsegment_async("## another_async_task") as subsegment: - subsegment.put_annotation(key="key", value="value") - subsegment.put_metadata(key="key", value="value", namespace="namespace") - ... - - async def another_async_task_2(): - ... +async def another_async_task_2(): + ... - @tracer.capture_method - async def collect_payment(charge_id): - asyncio.gather(another_async_task(), another_async_task_2()) - ... - ``` +@tracer.capture_method +async def collect_payment(charge_id): + asyncio.gather(another_async_task(), another_async_task_2()) + ... +``` ### Reusing Tracer across your code Tracer keeps a copy of its configuration after the first initialization. This is useful for scenarios where you want to use Tracer in more than one location across your code base. -!!! warning - When reusing Tracer in Lambda Layers, or in multiple modules, **do not set `auto_patch=False`**, because import order matters. +???+ warning "Warning: Import order matters when using Lambda Layers or multiple modules" + **Do not set `auto_patch=False`** when reusing Tracer in Lambda Layers, or in multiple modules. This can result in the first Tracer config being inherited by new instances, and their modules not being patched. + Tracer will automatically ignore imported modules that have been patched. + === "handler.py" ```python hl_lines="2 4 9" diff --git a/docs/index.md b/docs/index.md index 03d89725b7b..5683fdd38be 100644 --- a/docs/index.md +++ b/docs/index.md @@ -5,7 +5,8 @@ description: AWS Lambda Powertools Python A suite of utilities for AWS Lambda functions to ease adopting best practices such as tracing, structured logging, custom metrics, and more. -!!! tip "Looking for a quick read through how the core features are used?" +???+ tip "Tip: Looking for a quick read through how the core features are used?" + Check out [this detailed blog post](https://aws.amazon.com/blogs/opensource/simplifying-serverless-best-practices-with-lambda-powertools/) with a practical example. ## Tenets @@ -23,7 +24,7 @@ This project separates core utilities that will be available in other runtimes v Powertools is available in the following formats: -* **Lambda Layer**: [**arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:4**](#){: .copyMe} :clipboard: +* **Lambda Layer**: [**arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:**](#){: .copyMe} * **PyPi**: **`pip install aws-lambda-powertools`** ### Lambda Layer @@ -32,27 +33,27 @@ Powertools is available in the following formats: You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https://docs.aws.amazon.com/lambda/latest/dg/invocation-layers.html#invocation-layers-using){target="_blank"}, or your preferred deployment framework. -??? note "Expand to copy any regional Lambda Layer ARN" +??? note "Note: Expand to copy any regional Lambda Layer ARN" | Region | Layer ARN |--------------------------- | --------------------------- - | `us-east-1` | [arn:aws:lambda:us-east-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `us-east-2` | [arn:aws:lambda:us-east-2:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `us-west-1` | [arn:aws:lambda:us-west-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `us-west-2` | [arn:aws:lambda:us-west-2:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-south-1` | [arn:aws:lambda:ap-south-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-northeast-1` | [arn:aws:lambda:ap-northeast-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-northeast-2` | [arn:aws:lambda:ap-northeast-2:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-northeast-3` | [arn:aws:lambda:ap-northeast-3:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-southeast-1` | [arn:aws:lambda:ap-southeast-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ap-southeast-2` | [arn:aws:lambda:ap-southeast-2:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `eu-central-1` | [arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `eu-west-1` | [arn:aws:lambda:eu-west-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `eu-west-2` | [arn:aws:lambda:eu-west-2:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `eu-west-3` | [arn:aws:lambda:eu-west-3:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `eu-north-1` | [arn:aws:lambda:eu-north-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `ca-central-1` | [arn:aws:lambda:ca-central-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: - | `sa-east-1` | [arn:aws:lambda:sa-east-1:017000801446:layer:AWSLambdaPowertoolsPython:4](#){: .copyMe} :clipboard: + | `us-east-1` | [arn:aws:lambda:us-east-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `us-east-2` | [arn:aws:lambda:us-east-2:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `us-west-1` | [arn:aws:lambda:us-west-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `us-west-2` | [arn:aws:lambda:us-west-2:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-south-1` | [arn:aws:lambda:ap-south-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-northeast-1` | [arn:aws:lambda:ap-northeast-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-northeast-2` | [arn:aws:lambda:ap-northeast-2:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-northeast-3` | [arn:aws:lambda:ap-northeast-3:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-southeast-1` | [arn:aws:lambda:ap-southeast-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ap-southeast-2` | [arn:aws:lambda:ap-southeast-2:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `eu-central-1` | [arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `eu-west-1` | [arn:aws:lambda:eu-west-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `eu-west-2` | [arn:aws:lambda:eu-west-2:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `eu-west-3` | [arn:aws:lambda:eu-west-3:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `eu-north-1` | [arn:aws:lambda:eu-north-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `ca-central-1` | [arn:aws:lambda:ca-central-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} + | `sa-east-1` | [arn:aws:lambda:sa-east-1:017000801446:layer:AWSLambdaPowertoolsPython:6 :clipboard:](#){: .copyMe} === "SAM" @@ -61,7 +62,7 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: Type: AWS::Serverless::Function Properties: Layers: - - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPython:4 + - !Sub arn:aws:lambda:${AWS::Region}:017000801446:layer:AWSLambdaPowertoolsPython:6 ``` === "Serverless framework" @@ -71,7 +72,7 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: hello: handler: lambda_function.lambda_handler layers: - - arn:aws:lambda:${aws:region}:017000801446:layer:AWSLambdaPowertoolsPython:4 + - arn:aws:lambda:${aws:region}:017000801446:layer:AWSLambdaPowertoolsPython:6 ``` === "CDK" @@ -87,7 +88,7 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: powertools_layer = aws_lambda.LayerVersion.from_layer_version_arn( self, id="lambda-powertools", - layer_version_arn=f"arn:aws:lambda:{env.region}:017000801446:layer:AWSLambdaPowertoolsPython:4" + layer_version_arn=f"arn:aws:lambda:{env.region}:017000801446:layer:AWSLambdaPowertoolsPython:6" ) aws_lambda.Function(self, 'sample-app-lambda', @@ -136,7 +137,7 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: role = aws_iam_role.iam_for_lambda.arn handler = "index.test" runtime = "python3.9" - layers = ["arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:4"] + layers = ["arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:6"] source_code_hash = filebase64sha256("lambda_function_payload.zip") } @@ -155,7 +156,7 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: ? Do you want to configure advanced settings? Yes ... ? Do you want to enable Lambda layers for this function? Yes - ? Enter up to 5 existing Lambda layer ARNs (comma-separated): arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:4 + ? Enter up to 5 existing Lambda layer ARNs (comma-separated): arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:6 ❯ amplify push -y @@ -166,16 +167,20 @@ You can include Lambda Powertools Lambda Layer using [AWS Lambda Console](https: - Name: ? Which setting do you want to update? Lambda layers configuration ? Do you want to enable Lambda layers for this function? Yes - ? Enter up to 5 existing Lambda layer ARNs (comma-separated): arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:4 + ? Enter up to 5 existing Lambda layer ARNs (comma-separated): arn:aws:lambda:eu-central-1:017000801446:layer:AWSLambdaPowertoolsPython:6 ? Do you want to edit the local lambda function now? No ``` === "Get the Layer .zip contents" Change {region} to your AWS region, e.g. `eu-west-1` - **`aws lambda get-layer-version-by-arn --arn arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:4 --region {region}`** + ```bash title="AWS CLI" + aws lambda get-layer-version-by-arn --arn arn:aws:lambda:{region}:017000801446:layer:AWSLambdaPowertoolsPython:6 --region {region} + ``` + + The pre-signed URL to download this Lambda Layer will be within `Location` key. -!!! warning "Limitations" +???+ warning "Warning: Limitations" Container Image deployment (OCI) or inline Lambda functions do not support Lambda Layers. @@ -193,10 +198,12 @@ Despite having more steps compared to the [public Layer ARN](#lambda-layer) opti | [aws-lambda-powertools-python-layer](https://serverlessrepo.aws.amazon.com/applications/eu-west-1/057560766410/aws-lambda-powertools-python-layer) | [arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer](#){: .copyMe} :clipboard: | Core dependencies only; sufficient for nearly all utilities. | [aws-lambda-powertools-python-layer-extras](https://serverlessrepo.aws.amazon.com/applications/eu-west-1/057560766410/aws-lambda-powertools-python-layer-extras) | [arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer-extras](#){: .copyMe} :clipboard: | Core plus extra dependencies such as `pydantic` that is required by `parser` utility. -!!! warning +???+ warning **Layer-extras** does not support Python 3.6 runtime. This layer also includes all extra dependencies: `22.4MB zipped`, `~155MB unzipped`. -!!! tip "You can create a shared Lambda Layers stack and make this along with other account level layers stack." +???+ tip + You can create a shared Lambda Layers stack and make this along with other account level layers stack. + If using SAM, you can include this SAR App as part of your shared Layers stack, and lock to a specific semantic version. Once deployed, it'll be available across the account this is deployed to. @@ -208,7 +215,7 @@ If using SAM, you can include this SAR App as part of your shared Layers stack, Properties: Location: ApplicationId: arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer - SemanticVersion: 1.22.0 # change to latest semantic version available in SAR + SemanticVersion: 1.23.0 # change to latest semantic version available in SAR MyLambdaFunction: Type: AWS::Serverless::Function @@ -236,7 +243,7 @@ If using SAM, you can include this SAR App as part of your shared Layers stack, Location: ApplicationId: arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer # Find latest from github.com/awslabs/aws-lambda-powertools-python/releases - SemanticVersion: 1.22.0 + SemanticVersion: 1.23.0 ``` === "CDK" @@ -246,7 +253,7 @@ If using SAM, you can include this SAR App as part of your shared Layers stack, POWERTOOLS_BASE_NAME = 'AWSLambdaPowertools' # Find latest from github.com/awslabs/aws-lambda-powertools-python/releases - POWERTOOLS_VER = '1.22.0' + POWERTOOLS_VER = '1.23.0' POWERTOOLS_ARN = 'arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer' class SampleApp(core.Construct): @@ -324,7 +331,7 @@ If using SAM, you can include this SAR App as part of your shared Layers stack, } ``` -??? tip "Example of least-privileged IAM permissions to deploy Layer" +??? example "Example: Least-privileged IAM permissions to deploy Layer" > Credits to [mwarkentin](https://github.com/mwarkentin) for providing the scoped down IAM permissions. @@ -389,24 +396,18 @@ If using SAM, you can include this SAR App as part of your shared Layers stack, - Ref: "PowertoolsLayerIamRole" ``` -You can fetch available versions via SAR API with: - -=== "shell" +You can fetch available versions via SAR ListApplicationVersions API: - ```bash - aws serverlessrepo list-application-versions \ - --application-id arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer - ``` +```bash title="AWS CLI example" +aws serverlessrepo list-application-versions \ + --application-id arn:aws:serverlessrepo:eu-west-1:057560766410:applications/aws-lambda-powertools-python-layer +``` ## Quick getting started -**Quick hello world example using SAM CLI** - -=== "shell" - - ```bash - sam init --location https://github.com/aws-samples/cookiecutter-aws-sam-python - ``` +```bash title="Hello world example using SAM CLI" +sam init --location https://github.com/aws-samples/cookiecutter-aws-sam-python +``` ## Features @@ -429,8 +430,8 @@ You can fetch available versions via SAR API with: ## Environment variables -!!! info - **Explicit parameters take precedence over environment variables.** +???+ info + Explicit parameters take precedence over environment variables | Environment variable | Description | Utility | Default | | ------------------------------------------------- | --------------------------------------------------------------------------------- | --------------------------------------------------------------------------------- | ------------------------------------------------- | @@ -448,12 +449,10 @@ You can fetch available versions via SAR API with: ## Debug mode -As a best practice, AWS Lambda Powertools logging statements are suppressed. If necessary, you can enable debugging using `set_package_logger`: +As a best practice, AWS Lambda Powertools module logging statements are suppressed. If necessary, you can enable debugging using `set_package_logger` for additional information on every internal operation: -=== "app.py" +```python title="Powertools debug mode example" +from aws_lambda_powertools.logging.logger import set_package_logger - ```python - from aws_lambda_powertools.logging.logger import set_package_logger - - set_package_logger() - ``` +set_package_logger() +``` diff --git a/docs/stylesheets/extra.css b/docs/stylesheets/extra.css index 0d37af692cb..194e4e2ba08 100644 --- a/docs/stylesheets/extra.css +++ b/docs/stylesheets/extra.css @@ -38,3 +38,8 @@ cursor: pointer; border-bottom: 0.1px dashed black; } + +p > code, +li > code { + font-weight: bold +} diff --git a/docs/utilities/batch.md b/docs/utilities/batch.md index 56ab160e9f9..cdfb51549ae 100644 --- a/docs/utilities/batch.md +++ b/docs/utilities/batch.md @@ -1,148 +1,1259 @@ --- -title: SQS Batch Processing +title: Batch Processing description: Utility --- -The SQS batch processing utility provides a way to handle partial failures when processing batches of messages from SQS. +The batch processing utility handles partial failures when processing batches from Amazon SQS, Amazon Kinesis Data Streams, and Amazon DynamoDB Streams. ## Key Features -* Prevent successfully processed messages being returned to SQS -* Simple interface for individually processing messages from a batch -* Build your own batch processor using the base classes +* Reports batch item failures to reduce number of retries for a record upon errors +* Simple interface to process each batch record +* Integrates with [Event Source Data Classes](./data_classes.md){target="_blank} and [Parser (Pydantic)](parser.md){target="_blank} for self-documenting record schema +* Build your own batch processor by extending primitives ## Background -When using SQS as a Lambda event source mapping, Lambda functions are triggered with a batch of messages from SQS. +When using SQS, Kinesis Data Streams, or DynamoDB Streams as a Lambda event source, your Lambda functions are triggered with a batch of messages. -If your function fails to process any message from the batch, the entire batch returns to your SQS queue, and your Lambda function is triggered with the same batch one more time. +If your function fails to process any message from the batch, the entire batch returns to your queue or stream. This same batch is then retried until either condition happens first: **a)** your Lambda function returns a successful response, **b)** record reaches maximum retry attempts, or **c)** when records expire. -With this utility, messages within a batch are handled individually - only messages that were not successfully processed -are returned to the queue. +With this utility, batch records are processed individually – only messages that failed to be processed return to the queue or stream for a further retry. This works when two mechanisms are in place: -!!! warning - While this utility lowers the chance of processing messages more than once, it is not guaranteed. We recommend implementing processing logic in an idempotent manner wherever possible. +1. `ReportBatchItemFailures` is set in your SQS, Kinesis, or DynamoDB event source properties +2. [A specific response](https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html#sqs-batchfailurereporting-syntax){target="_blank"} is returned so Lambda knows which records should not be deleted during partial responses - More details on how Lambda works with SQS can be found in the [AWS documentation](https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html) +???+ warning "Warning: This utility lowers the chance of processing records more than once; it does not guarantee it" + We recommend implementing processing logic in an [idempotent manner](idempotency.md){target="_blank"} wherever possible. + + You can find more details on how Lambda works with either [SQS](https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html){target="_blank"}, [Kinesis](https://docs.aws.amazon.com/lambda/latest/dg/with-kinesis.html){target="_blank"}, or [DynamoDB](https://docs.aws.amazon.com/lambda/latest/dg/with-ddb.html){target="_blank"} in the AWS Documentation. ## Getting started -### IAM Permissions +Regardless whether you're using SQS, Kinesis Data Streams or DynamoDB Streams, you must configure your Lambda function event source to use ``ReportBatchItemFailures`. + +You do not need any additional IAM permissions to use this utility, except for what each event source requires. + +### Required resources -Before your use this utility, your AWS Lambda function must have `sqs:DeleteMessageBatch` permission to delete successful messages directly from the queue. +The remaining sections of the documentation will rely on these samples. For completeness, this demonstrates IAM permissions and Dead Letter Queue where batch records will be sent after 2 retries were attempted. -> Example using AWS Serverless Application Model (SAM) -=== "template.yml" +=== "SQS" + + ```yaml title="template.yaml" hl_lines="31-32" + AWSTemplateFormatVersion: '2010-09-09' + Transform: AWS::Serverless-2016-10-31 + Description: partial batch response sample + + Globals: + Function: + Timeout: 5 + MemorySize: 256 + Runtime: python3.9 + Tracing: Active + Environment: + Variables: + LOG_LEVEL: INFO + POWERTOOLS_SERVICE_NAME: hello - ```yaml hl_lines="2-3 12-15" Resources: - MyQueue: - Type: AWS::SQS::Queue + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Handler: app.lambda_handler + CodeUri: hello_world + Policies: + - SQSPollerPolicy: + QueueName: !GetAtt SampleQueue.QueueName + Events: + Batch: + Type: SQS + Properties: + Queue: !GetAtt SampleQueue.Arn + FunctionResponseTypes: + - ReportBatchItemFailures + + SampleDLQ: + Type: AWS::SQS::Queue + + SampleQueue: + Type: AWS::SQS::Queue + Properties: + VisibilityTimeout: 30 # Fn timeout * 6 + RedrivePolicy: + maxReceiveCount: 2 + deadLetterTargetArn: !GetAtt SampleDLQ.Arn + ``` + +=== "Kinesis Data Streams" + + ```yaml title="template.yaml" hl_lines="44-45" + AWSTemplateFormatVersion: '2010-09-09' + Transform: AWS::Serverless-2016-10-31 + Description: partial batch response sample + + Globals: + Function: + Timeout: 5 + MemorySize: 256 + Runtime: python3.9 + Tracing: Active + Environment: + Variables: + LOG_LEVEL: INFO + POWERTOOLS_SERVICE_NAME: hello + Resources: HelloWorldFunction: - Type: AWS::Serverless::Function - Properties: - Runtime: python3.8 + Type: AWS::Serverless::Function + Properties: + Handler: app.lambda_handler + CodeUri: hello_world + Policies: + # Lambda Destinations require additional permissions + # to send failure records to DLQ from Kinesis/DynamoDB + - Version: "2012-10-17" + Statement: + Effect: "Allow" + Action: + - sqs:GetQueueAttributes + - sqs:GetQueueUrl + - sqs:SendMessage + Resource: !GetAtt SampleDLQ.Arn + Events: + KinesisStream: + Type: Kinesis + Properties: + Stream: !GetAtt SampleStream.Arn + BatchSize: 100 + StartingPosition: LATEST + MaximumRetryAttempts: 2 + DestinationConfig: + OnFailure: + Destination: !GetAtt SampleDLQ.Arn + FunctionResponseTypes: + - ReportBatchItemFailures + + SampleDLQ: + Type: AWS::SQS::Queue + + SampleStream: + Type: AWS::Kinesis::Stream + Properties: + ShardCount: 1 + ``` + +=== "DynamoDB Streams" + + ```yaml title="template.yaml" hl_lines="43-44" + AWSTemplateFormatVersion: '2010-09-09' + Transform: AWS::Serverless-2016-10-31 + Description: partial batch response sample + + Globals: + Function: + Timeout: 5 + MemorySize: 256 + Runtime: python3.9 + Tracing: Active Environment: - Variables: - POWERTOOLS_SERVICE_NAME: example - Policies: - - SQSPollerPolicy: - QueueName: - !GetAtt MyQueue.QueueName + Variables: + LOG_LEVEL: INFO + POWERTOOLS_SERVICE_NAME: hello + + Resources: + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Handler: app.lambda_handler + CodeUri: hello_world + Policies: + # Lambda Destinations require additional permissions + # to send failure records from Kinesis/DynamoDB + - Version: "2012-10-17" + Statement: + Effect: "Allow" + Action: + - sqs:GetQueueAttributes + - sqs:GetQueueUrl + - sqs:SendMessage + Resource: !GetAtt SampleDLQ.Arn + Events: + DynamoDBStream: + Type: DynamoDB + Properties: + Stream: !GetAtt SampleTable.StreamArn + StartingPosition: LATEST + MaximumRetryAttempts: 2 + DestinationConfig: + OnFailure: + Destination: !GetAtt SampleDLQ.Arn + FunctionResponseTypes: + - ReportBatchItemFailures + + SampleDLQ: + Type: AWS::SQS::Queue + + SampleTable: + Type: AWS::DynamoDB::Table + Properties: + BillingMode: PAY_PER_REQUEST + AttributeDefinitions: + - AttributeName: pk + AttributeType: S + - AttributeName: sk + AttributeType: S + KeySchema: + - AttributeName: pk + KeyType: HASH + - AttributeName: sk + KeyType: RANGE + SSESpecification: + SSEEnabled: yes + StreamSpecification: + StreamViewType: NEW_AND_OLD_IMAGES + ``` ### Processing messages from SQS -You can use either `sqs_batch_processor` decorator, or `PartialSQSProcessor` as a context manager if you'd like access to the processed results. +Processing batches from SQS works in four stages: -You need to create a function to handle each record from the batch - We call it `record_handler` from here on. +1. Instantiate **`BatchProcessor`** and choose **`EventType.SQS`** for the event type +2. Define your function to handle each batch record, and use [`SQSRecord`](data_classes.md#sqs){target="_blank"} type annotation for autocompletion +3. Use either **`batch_processor`** decorator or your instantiated processor as a context manager to kick off processing +4. Return the appropriate response contract to Lambda via **`.response()`** processor method -=== "Decorator" +???+ info + This code example optionally uses Tracer and Logger for completion. - ```python hl_lines="3 6" - from aws_lambda_powertools.utilities.batch import sqs_batch_processor +=== "As a decorator" - def record_handler(record): - return do_something_with(record["body"]) + ```python hl_lines="4-5 9 15 23 25" + import json - @sqs_batch_processor(record_handler=record_handler) - def lambda_handler(event, context): - return {"statusCode": 200} + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.SQS) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() ``` -=== "Context manager" - ```python hl_lines="3 9 11-12" - from aws_lambda_powertools.utilities.batch import PartialSQSProcessor +=== "As a context manager" - def record_handler(record): - return_value = do_something_with(record["body"]) - return return_value + ```python hl_lines="4-5 9 15 24-26 28" + import json - def lambda_handler(event, context): - records = event["Records"] - processor = PartialSQSProcessor() + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.typing import LambdaContext - with processor(records, record_handler) as proc: - result = proc.process() # Returns a list of all results from record_handler - return result + processor = BatchProcessor(event_type=EventType.SQS) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + def lambda_handler(event, context: LambdaContext): + batch = event["Records"] + with processor(records=batch, processor=processor): + processed_messages = processor.process() # kick off processing, return list[tuple] + + return processor.response() + ``` + +=== "Sample response" + + The second record failed to be processed, therefore the processor added its message ID in the response. + + ```python + { + 'batchItemFailures': [ + { + 'itemIdentifier': '244fc6b4-87a3-44ab-83d2-361172410c3a' + } + ] + } + ``` + +=== "Sample event" + + ```json + { + "Records": [ + { + "messageId": "059f36b4-87a3-44ab-83d2-661975830a7d", + "receiptHandle": "AQEBwJnKyrHigUMZj6rYigCgxlaS3SLy0a", + "body": "{\"Message\": \"success\"}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1545082649183", + "SenderId": "AIDAIENQZJOLO23YVJ4VO", + "ApproximateFirstReceiveTimestamp": "1545082649185" + }, + "messageAttributes": {}, + "md5OfBody": "e4e68fb7bd0e697a0ae8f1bb342846b3", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-2: 123456789012:my-queue", + "awsRegion": "us-east-1" + }, + { + "messageId": "244fc6b4-87a3-44ab-83d2-361172410c3a", + "receiptHandle": "AQEBwJnKyrHigUMZj6rYigCgxlaS3SLy0a", + "body": "SGVsbG8sIHRoaXMgaXMgYSB0ZXN0Lg==", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1545082649183", + "SenderId": "AIDAIENQZJOLO23YVJ4VO", + "ApproximateFirstReceiveTimestamp": "1545082649185" + }, + "messageAttributes": {}, + "md5OfBody": "e4e68fb7bd0e697a0ae8f1bb342846b3", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-2: 123456789012:my-queue", + "awsRegion": "us-east-1" + } + ] + } + ``` + +### Processing messages from Kinesis + +Processing batches from Kinesis works in four stages: + +1. Instantiate **`BatchProcessor`** and choose **`EventType.KinesisDataStreams`** for the event type +2. Define your function to handle each batch record, and use [`KinesisStreamRecord`](data_classes.md#kinesis-streams){target="_blank"} type annotation for autocompletion +3. Use either **`batch_processor`** decorator or your instantiated processor as a context manager to kick off processing +4. Return the appropriate response contract to Lambda via **`.response()`** processor method + +???+ info + This code example optionally uses Tracer and Logger for completion. + +=== "As a decorator" + + ```python hl_lines="4-5 9 15 22 24" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: KinesisStreamRecord): + logger.info(record.kinesis.data_as_text) + payload: dict = record.kinesis.data_as_json() + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + +=== "As a context manager" + + ```python hl_lines="4-5 9 15 23-25 27" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: KinesisStreamRecord): + logger.info(record.kinesis.data_as_text) + payload: dict = record.kinesis.data_as_json() + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + def lambda_handler(event, context: LambdaContext): + batch = event["Records"] + with processor(records=batch, processor=processor): + processed_messages = processor.process() # kick off processing, return list[tuple] + + return processor.response() + ``` + +=== "Sample response" + + The second record failed to be processed, therefore the processor added its sequence number in the response. + + ```python + { + 'batchItemFailures': [ + { + 'itemIdentifier': '6006958808509702859251049540584488075644979031228738' + } + ] + } + ``` + + +=== "Sample event" + + ```json + { + "Records": [ + { + "kinesis": { + "kinesisSchemaVersion": "1.0", + "partitionKey": "1", + "sequenceNumber": "4107859083838847772757075850904226111829882106684065", + "data": "eyJNZXNzYWdlIjogInN1Y2Nlc3MifQ==", + "approximateArrivalTimestamp": 1545084650.987 + }, + "eventSource": "aws:kinesis", + "eventVersion": "1.0", + "eventID": "shardId-000000000006:4107859083838847772757075850904226111829882106684065", + "eventName": "aws:kinesis:record", + "invokeIdentityArn": "arn:aws:iam::123456789012:role/lambda-role", + "awsRegion": "us-east-2", + "eventSourceARN": "arn:aws:kinesis:us-east-2:123456789012:stream/lambda-stream" + }, + { + "kinesis": { + "kinesisSchemaVersion": "1.0", + "partitionKey": "1", + "sequenceNumber": "6006958808509702859251049540584488075644979031228738", + "data": "c3VjY2Vzcw==", + "approximateArrivalTimestamp": 1545084650.987 + }, + "eventSource": "aws:kinesis", + "eventVersion": "1.0", + "eventID": "shardId-000000000006:6006958808509702859251049540584488075644979031228738", + "eventName": "aws:kinesis:record", + "invokeIdentityArn": "arn:aws:iam::123456789012:role/lambda-role", + "awsRegion": "us-east-2", + "eventSourceARN": "arn:aws:kinesis:us-east-2:123456789012:stream/lambda-stream" + } + ] + } + ``` + + +### Processing messages from DynamoDB + +Processing batches from Kinesis works in four stages: + +1. Instantiate **`BatchProcessor`** and choose **`EventType.DynamoDBStreams`** for the event type +2. Define your function to handle each batch record, and use [`DynamoDBRecord`](data_classes.md#dynamodb-streams){target="_blank"} type annotation for autocompletion +3. Use either **`batch_processor`** decorator or your instantiated processor as a context manager to kick off processing +4. Return the appropriate response contract to Lambda via **`.response()`** processor method + +???+ info + This code example optionally uses Tracer and Logger for completion. + +=== "As a decorator" + + ```python hl_lines="4-5 9 15 25 27" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: DynamoDBRecord): + logger.info(record.dynamodb.new_image) + payload: dict = json.loads(record.dynamodb.new_image.get("Message").get_value) + # alternatively: + # changes: Dict[str, dynamo_db_stream_event.AttributeValue] = record.dynamodb.new_image + # payload = change.get("Message").raw_event -> {"S": ""} + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() ``` -!!! tip - **Any non-exception/successful return from your record handler function** will instruct both decorator and context manager to queue up each individual message for deletion. +=== "As a context manager" + + ```python hl_lines="4-5 9 15 26-28 30" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + tracer = Tracer() + logger = Logger() - If the entire batch succeeds, we let Lambda to proceed in deleting the records from the queue for cost reasons. + + @tracer.capture_method + def record_handler(record: DynamoDBRecord): + logger.info(record.dynamodb.new_image) + payload: dict = json.loads(record.dynamodb.new_image.get("item").s_value) + # alternatively: + # changes: Dict[str, dynamo_db_stream_event.AttributeValue] = record.dynamodb.new_image + # payload = change.get("Message").raw_event -> {"S": ""} + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + def lambda_handler(event, context: LambdaContext): + batch = event["Records"] + with processor(records=batch, processor=processor): + processed_messages = processor.process() # kick off processing, return list[tuple] + + return processor.response() + ``` + +=== "Sample response" + + The second record failed to be processed, therefore the processor added its sequence number in the response. + + ```python + { + 'batchItemFailures': [ + { + 'itemIdentifier': '8640712661' + } + ] + } + ``` + + +=== "Sample event" + + ```json + { + "Records": [ + { + "eventID": "1", + "eventVersion": "1.0", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "Message": { + "S": "failure" + } + }, + "StreamViewType": "NEW_AND_OLD_IMAGES", + "SequenceNumber": "3275880929", + "SizeBytes": 26 + }, + "awsRegion": "us-west-2", + "eventName": "INSERT", + "eventSourceARN": "eventsource_arn", + "eventSource": "aws:dynamodb" + }, + { + "eventID": "1", + "eventVersion": "1.0", + "dynamodb": { + "Keys": { + "Id": { + "N": "101" + } + }, + "NewImage": { + "SomethingElse": { + "S": "success" + } + }, + "StreamViewType": "NEW_AND_OLD_IMAGES", + "SequenceNumber": "8640712661", + "SizeBytes": 26 + }, + "awsRegion": "us-west-2", + "eventName": "INSERT", + "eventSourceARN": "eventsource_arn", + "eventSource": "aws:dynamodb" + } + ] + } + ``` ### Partial failure mechanics All records in the batch will be passed to this handler for processing, even if exceptions are thrown - Here's the behaviour after completing the batch: -* **Any successfully processed messages**, we will delete them from the queue via `sqs:DeleteMessageBatch` -* **Any unprocessed messages detected**, we will raise `SQSBatchProcessingError` to ensure failed messages return to your SQS queue +* **All records successfully processed**. We will return an empty list of item failures `{'batchItemFailures': []}` +* **Partial success with some exceptions**. We will return a list of all item IDs/sequence numbers that failed processing +* **All records failed to be processed**. We will raise `BatchProcessingError` exception with a list of all exceptions raised when processing -!!! warning - You will not have accessed to the **processed messages** within the Lambda Handler. +???+ warning + You will not have access to the **processed messages** within the Lambda Handler; use context manager for that. All processing logic will and should be performed by the `record_handler` function. + ## Advanced -### Choosing between decorator and context manager +### Pydantic integration + +You can bring your own Pydantic models via **`model`** parameter when inheriting from **`SqsRecordModel`**, **`KinesisDataStreamRecord`**, or **`DynamoDBStreamRecordModel`** + +Inheritance is importance because we need to access message IDs and sequence numbers from these records in the event of failure. Mypy is fully integrated with this utility, so it should identify whether you're passing the incorrect Model. + + +=== "SQS" + + ```python hl_lines="5 9-10 12-19 21 27" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.parser.models import SqsRecordModel + from aws_lambda_powertools.utilities.typing import LambdaContext + + + class Order(BaseModel): + item: dict + + class OrderSqsRecord(SqsRecordModel): + body: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("body", pre=True) + def transform_body_to_dict(cls, value: str): + return json.loads(value) + + processor = BatchProcessor(event_type=EventType.SQS, model=OrderSqsRecord) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: OrderSqsRecord): + return record.body.item + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + +=== "Kinesis Data Streams" + + ```python hl_lines="5 9-10 12-20 22-23 26 32" + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.parser.models import KinesisDataStreamRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + -They have nearly the same behaviour when it comes to processing messages from the batch: + class Order(BaseModel): + item: dict -* **Entire batch has been successfully processed**, where your Lambda handler returned successfully, we will let SQS delete the batch to optimize your cost -* **Entire Batch has been partially processed successfully**, where exceptions were raised within your `record handler`, we will: - * **1)** Delete successfully processed messages from the queue by directly calling `sqs:DeleteMessageBatch` - * **2)** Raise `SQSBatchProcessingError` to ensure failed messages return to your SQS queue + class OrderKinesisPayloadRecord(KinesisDataStreamRecordPayload): + data: Order -The only difference is that **PartialSQSProcessor** will give you access to processed messages if you need. + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("data", pre=True) + def transform_message_to_dict(cls, value: str): + # Powertools KinesisDataStreamRecordModel already decodes b64 to str here + return json.loads(value) + + class OrderKinesisRecord(KinesisDataStreamRecordModel): + kinesis: OrderKinesisPayloadRecord + + + processor = BatchProcessor(event_type=EventType.KinesisDataStreams, model=OrderKinesisRecord) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: OrderKinesisRecord): + return record.kinesis.data.item + + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + +=== "DynamoDB Streams" + + ```python hl_lines="7 11-12 14-21 23-25 27-28 31 37" + import json + + from typing import Dict, Literal + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.parser.models import DynamoDBStreamRecordModel + from aws_lambda_powertools.utilities.typing import LambdaContext + + + class Order(BaseModel): + item: dict + + class OrderDynamoDB(BaseModel): + Message: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("Message", pre=True) + def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): + return json.loads(value["S"]) + + class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): + NewImage: Optional[OrderDynamoDB] + OldImage: Optional[OrderDynamoDB] + + class OrderDynamoDBRecord(DynamoDBStreamRecordModel): + dynamodb: OrderDynamoDBChangeRecord + + + processor = BatchProcessor(event_type=EventType.DynamoDBStreams, model=OrderKinesisRecord) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: OrderDynamoDBRecord): + return record.dynamodb.NewImage.Message.item + + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` ### Accessing processed messages -Use `PartialSQSProcessor` context manager to access a list of all return values from your `record_handler` function. +Use the context manager to access a list of all returned values from your `record_handler` function. + +* **When successful**. We will include a tuple with `success`, the result of `record_handler`, and the batch record +* **When failed**. We will include a tuple with `fail`, exception as a string, and the batch record + + +```python hl_lines="31-38" title="Accessing processed messages via context manager" +import json + +from typing import Any, List, Literal, Union + +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.utilities.batch import (BatchProcessor, + EventType, + FailureResponse, + SuccessResponse, + batch_processor) +from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord +from aws_lambda_powertools.utilities.typing import LambdaContext + + +processor = BatchProcessor(event_type=EventType.SQS) +tracer = Tracer() +logger = Logger() + + +@tracer.capture_method +def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + +@logger.inject_lambda_context +@tracer.capture_lambda_handler +def lambda_handler(event, context: LambdaContext): + batch = event["Records"] + with processor(records=batch, processor=processor): + processed_messages: List[Union[SuccessResponse, FailureResponse]] = processor.process() + + for messages in processed_messages: + for message in messages: + status: Union[Literal["success"], Literal["fail"]] = message[0] + result: Any = message[1] + record: SQSRecord = message[2] + + + return processor.response() +``` + + +### Extending BatchProcessor + +You might want to bring custom logic to the existing `BatchProcessor` to slightly override how we handle successes and failures. + +For these scenarios, you can subclass `BatchProcessor` and quickly override `success_handler` and `failure_handler` methods: + +* **`success_handler()`** – Keeps track of successful batch records +* **`failure_handler()`** – Keeps track of failed batch records + +???+ example + Let's suppose you'd like to add a metric named `BatchRecordFailures` for each batch record that failed processing -=== "app.py" +```python title="Extending failure handling mechanism in BatchProcessor" + +from typing import Tuple + +from aws_lambda_powertools import Metrics +from aws_lambda_powertools.metrics import MetricUnit +from aws_lambda_powertools.utilities.batch import batch_processor, BatchProcessor, ExceptionInfo, EventType, FailureResponse +from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + + +class MyProcessor(BatchProcessor): + def failure_handler(self, record: SQSRecord, exception: ExceptionInfo) -> FailureResponse: + metrics.add_metric(name="BatchRecordFailures", unit=MetricUnit.Count, value=1) + return super().failure_handler(record, exception) + +processor = MyProcessor(event_type=EventType.SQS) +metrics = Metrics(namespace="test") + + +@tracer.capture_method +def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + +@metrics.log_metrics(capture_cold_start_metric=True) +@batch_processor(record_handler=record_handler, processor=processor) +def lambda_handler(event, context: LambdaContext): + return processor.response() +``` + +### Create your own partial processor + +You can create your own partial batch processor from scratch by inheriting the `BasePartialProcessor` class, and implementing `_prepare()`, `_clean()` and `_process_record()`. + +* **`_process_record()`** – handles all processing logic for each individual message of a batch, including calling the `record_handler` (self.handler) +* **`_prepare()`** – called once as part of the processor initialization +* **`clean()`** – teardown logic called once after `_process_record` completes + +You can then use this class as a context manager, or pass it to `batch_processor` to use as a decorator on your Lambda handler function. + +```python hl_lines="3 9 24 30 37 57" title="Creating a custom batch processor" +from random import randint + +from aws_lambda_powertools.utilities.batch import BasePartialProcessor, batch_processor +import boto3 +import os + +table_name = os.getenv("TABLE_NAME", "table_not_found") + +class MyPartialProcessor(BasePartialProcessor): + """ + Process a record and stores successful results at a Amazon DynamoDB Table + + Parameters + ---------- + table_name: str + DynamoDB table name to write results to + """ + + def __init__(self, table_name: str): + self.table_name = table_name + + super().__init__() + + def _prepare(self): + # It's called once, *before* processing + # Creates table resource and clean previous results + self.ddb_table = boto3.resource("dynamodb").Table(self.table_name) + self.success_messages.clear() + + def _clean(self): + # It's called once, *after* closing processing all records (closing the context manager) + # Here we're sending, at once, all successful messages to a ddb table + with ddb_table.batch_writer() as batch: + for result in self.success_messages: + batch.put_item(Item=result) + + def _process_record(self, record): + # It handles how your record is processed + # Here we're keeping the status of each run + # where self.handler is the record_handler function passed as an argument + try: + result = self.handler(record) # record_handler passed to decorator/context manager + return self.success_handler(record, result) + except Exception as exc: + return self.failure_handler(record, exc) + + def success_handler(self, record): + entry = ("success", result, record) + message = {"age": result} + self.success_messages.append(message) + return entry + + +def record_handler(record): + return randint(0, 100) + +@batch_processor(record_handler=record_handler, processor=MyPartialProcessor(table_name)) +def lambda_handler(event, context): + return {"statusCode": 200} +``` + +### Caveats + +#### Tracer response auto-capture for large batch sizes + +When using Tracer to capture responses for each batch record processing, you might exceed 64K of tracing data depending on what you return from your `record_handler` function, or how big is your batch size. + +If that's the case, you can configure [Tracer to disable response auto-capturing](../core/tracer.md#disabling-response-auto-capture){target="_blank"}. + + +```python hl_lines="14" title="Disabling Tracer response auto-capturing" +import json + +from aws_lambda_powertools import Logger, Tracer +from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor +from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord +from aws_lambda_powertools.utilities.typing import LambdaContext + + +processor = BatchProcessor(event_type=EventType.SQS) +tracer = Tracer() +logger = Logger() + + +@tracer.capture_method(capture_response=False) +def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + +@logger.inject_lambda_context +@tracer.capture_lambda_handler +@batch_processor(record_handler=record_handler, processor=processor) +def lambda_handler(event, context: LambdaContext): + return processor.response() + +``` + +## Testing your code + +As there is no external calls, you can unit test your code with `BatchProcessor` quite easily. + +**Example**: + +Given a SQS batch where the first batch record succeeds and the second fails processing, we should have a single item reported in the function response. + +=== "test_app.py" ```python - from aws_lambda_powertools.utilities.batch import PartialSQSProcessor + import json + + from pathlib import Path + from dataclasses import dataclass + + import pytest + from src.app import lambda_handler, processor + + + def load_event(path: Path): + with path.open() as f: + return json.load(f) + + + @pytest.fixture + def lambda_context(): + @dataclass + class LambdaContext: + function_name: str = "test" + memory_limit_in_mb: int = 128 + invoked_function_arn: str = "arn:aws:lambda:eu-west-1:809313241:function:test" + aws_request_id: str = "52fdfc07-2182-154f-163f-5f0f9a621d72" + + return LambdaContext() + + @pytest.fixture() + def sqs_event(): + """Generates API GW Event""" + return load_event(path=Path("events/sqs_event.json")) + + + def test_app_batch_partial_response(sqs_event, lambda_context): + # GIVEN + processor = app.processor # access processor for additional assertions + successful_record = sqs_event["Records"][0] + failed_record = sqs_event["Records"][1] + expected_response = { + "batchItemFailures: [ + { + "itemIdentifier": failed_record["messageId"] + } + ] + } + + # WHEN + ret = app.lambda_handler(sqs_event, lambda_context) + + # THEN + assert ret == expected_response + assert len(processor.fail_messages) == 1 + assert processor.success_messages[0] == successful_record + ``` + +=== "src/app.py" + + ```python + import json + + from aws_lambda_powertools import Logger, Tracer + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.typing import LambdaContext + + + processor = BatchProcessor(event_type=EventType.SQS) + tracer = Tracer() + logger = Logger() + + + @tracer.capture_method + def record_handler(record: SQSRecord): + payload: str = record.body + if payload: + item: dict = json.loads(payload) + ... + + @logger.inject_lambda_context + @tracer.capture_lambda_handler + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context: LambdaContext): + return processor.response() + ``` + +=== "Sample SQS event" + + ```json title="events/sqs_sample.json" + { + "Records": [ + { + "messageId": "059f36b4-87a3-44ab-83d2-661975830a7d", + "receiptHandle": "AQEBwJnKyrHigUMZj6rYigCgxlaS3SLy0a", + "body": "{\"Message\": \"success\"}", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1545082649183", + "SenderId": "AIDAIENQZJOLO23YVJ4VO", + "ApproximateFirstReceiveTimestamp": "1545082649185" + }, + "messageAttributes": {}, + "md5OfBody": "e4e68fb7bd0e697a0ae8f1bb342846b3", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-2: 123456789012:my-queue", + "awsRegion": "us-east-1" + }, + { + "messageId": "244fc6b4-87a3-44ab-83d2-361172410c3a", + "receiptHandle": "AQEBwJnKyrHigUMZj6rYigCgxlaS3SLy0a", + "body": "SGVsbG8sIHRoaXMgaXMgYSB0ZXN0Lg==", + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1545082649183", + "SenderId": "AIDAIENQZJOLO23YVJ4VO", + "ApproximateFirstReceiveTimestamp": "1545082649185" + }, + "messageAttributes": {}, + "md5OfBody": "e4e68fb7bd0e697a0ae8f1bb342846b3", + "eventSource": "aws:sqs", + "eventSourceARN": "arn:aws:sqs:us-east-2: 123456789012:my-queue", + "awsRegion": "us-east-1" + } + ] + } + ``` + + + +## FAQ + +### Choosing between decorator and context manager + +Use context manager when you want access to the processed messages or handle `BatchProcessingError` exception when all records within the batch fail to be processed. + +### Integrating exception handling with Sentry.io + +When using Sentry.io for error monitoring, you can override `failure_handler` to capture each processing exception with Sentry SDK: + +> Credits to [Charles-Axel Dein](https://github.com/awslabs/aws-lambda-powertools-python/issues/293#issuecomment-781961732) + +```python hl_lines="4 7-8" title="Integrating error tracking with Sentry.io" +from typing import Tuple + +from aws_lambda_powertools.utilities.batch import BatchProcessor, FailureResponse +from sentry_sdk import capture_exception + + +class MyProcessor(BatchProcessor): + def failure_handler(self, record, exception) -> FailureResponse: + capture_exception() # send exception to Sentry + return super().failure_handler(record, exception) +``` + + +## Legacy + +???+ tip + This is kept for historical purposes. Use the new [BatchProcessor](#processing-messages-from-sqs) instead. + + +### Migration guide + +???+ info + Keep reading if you are using `sqs_batch_processor` or `PartialSQSProcessor`. + +[As of Nov 2021](https://aws.amazon.com/about-aws/whats-new/2021/11/aws-lambda-partial-batch-response-sqs-event-source/){target="_blank"}, this is no longer needed as both SQS, Kinesis, and DynamoDB Streams offer this capability natively with one caveat - it's an [opt-in feature](#required-resources). + +Being a native feature, we no longer need to instantiate boto3 nor other customizations like exception suppressing – this lowers the cost of your Lambda function as you can delegate deleting partial failures to Lambda. + +???+ tip + It's also easier to test since it's mostly a [contract based response](https://docs.aws.amazon.com/lambda/latest/dg/with-sqs.html#sqs-batchfailurereporting-syntax){target="_blank"}. + +You can migrate in three steps: + +1. If you are using **`sqs_batch_decorator`** you can now use **`batch_processor`** decorator +2. If you were using **`PartialSQSProcessor`** you can now use **`BatchProcessor`** +3. Change your Lambda Handler to return the new response format + + +=== "Decorator: Before" + + ```python hl_lines="1 6" + from aws_lambda_powertools.utilities.batch import sqs_batch_processor + + def record_handler(record): + return do_something_with(record["body"]) + + @sqs_batch_processor(record_handler=record_handler) + def lambda_handler(event, context): + return {"statusCode": 200} + ``` + +=== "Decorator: After" + + ```python hl_lines="3 5 11" + import json + + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + + processor = BatchProcessor(event_type=EventType.SQS) + def record_handler(record): return do_something_with(record["body"]) + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + ``` + + +=== "Context manager: Before" + + ```python hl_lines="1-2 4 14 19" + from aws_lambda_powertools.utilities.batch import PartialSQSProcessor + from botocore.config import Config + + config = Config(region_name="us-east-1") + + def record_handler(record): + return_value = do_something_with(record["body"]) + return return_value + + def lambda_handler(event, context): records = event["Records"] - processor = PartialSQSProcessor() + processor = PartialSQSProcessor(config=config) - with processor(records, record_handler) as proc: - result = proc.process() # Returns a list of all results from record_handler + with processor(records, record_handler): + result = processor.process() return result ``` +=== "Context manager: After" + + ```python hl_lines="1 11" + from aws_lambda_powertools.utilities.batch import BatchProcessor, EventType, batch_processor + + + def record_handler(record): + return_value = do_something_with(record["body"]) + return return_value + + def lambda_handler(event, context): + records = event["Records"] + + processor = BatchProcessor(event_type=EventType.SQS) + + with processor(records, record_handler): + result = processor.process() + + return processor.response() + ``` + ### Customizing boto configuration The **`config`** and **`boto3_session`** parameters enable you to pass in a custom [botocore config object](https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html) @@ -267,98 +1378,3 @@ If you want to disable the default behavior where `SQSBatchProcessingError` is r with processor(records, record_handler): result = processor.process() ``` - -### Create your own partial processor - -You can create your own partial batch processor by inheriting the `BasePartialProcessor` class, and implementing `_prepare()`, `_clean()` and `_process_record()`. - -* **`_process_record()`** - Handles all processing logic for each individual message of a batch, including calling the `record_handler` (self.handler) -* **`_prepare()`** - Called once as part of the processor initialization -* **`clean()`** - Teardown logic called once after `_process_record` completes - -You can then use this class as a context manager, or pass it to `batch_processor` to use as a decorator on your Lambda handler function. - -=== "custom_processor.py" - - ```python hl_lines="3 9 24 30 37 57" - from random import randint - - from aws_lambda_powertools.utilities.batch import BasePartialProcessor, batch_processor - import boto3 - import os - - table_name = os.getenv("TABLE_NAME", "table_not_found") - - class MyPartialProcessor(BasePartialProcessor): - """ - Process a record and stores successful results at a Amazon DynamoDB Table - - Parameters - ---------- - table_name: str - DynamoDB table name to write results to - """ - - def __init__(self, table_name: str): - self.table_name = table_name - - super().__init__() - - def _prepare(self): - # It's called once, *before* processing - # Creates table resource and clean previous results - self.ddb_table = boto3.resource("dynamodb").Table(self.table_name) - self.success_messages.clear() - - def _clean(self): - # It's called once, *after* closing processing all records (closing the context manager) - # Here we're sending, at once, all successful messages to a ddb table - with ddb_table.batch_writer() as batch: - for result in self.success_messages: - batch.put_item(Item=result) - - def _process_record(self, record): - # It handles how your record is processed - # Here we're keeping the status of each run - # where self.handler is the record_handler function passed as an argument - try: - result = self.handler(record) # record_handler passed to decorator/context manager - return self.success_handler(record, result) - except Exception as exc: - return self.failure_handler(record, exc) - - def success_handler(self, record): - entry = ("success", result, record) - message = {"age": result} - self.success_messages.append(message) - return entry - - - def record_handler(record): - return randint(0, 100) - - @batch_processor(record_handler=record_handler, processor=MyPartialProcessor(table_name)) - def lambda_handler(event, context): - return {"statusCode": 200} - ``` - -### Integrating exception handling with Sentry.io - -When using Sentry.io for error monitoring, you can override `failure_handler` to include to capture each processing exception: - -> Credits to [Charles-Axel Dein](https://github.com/awslabs/aws-lambda-powertools-python/issues/293#issuecomment-781961732) - -=== "sentry_integration.py" - - ```python hl_lines="4 7-8" - from typing import Tuple - - from aws_lambda_powertools.utilities.batch import PartialSQSProcessor - from sentry_sdk import capture_exception - - class SQSProcessor(PartialSQSProcessor): - def failure_handler(self, record: Event, exception: Tuple) -> Tuple: # type: ignore - capture_exception() # send exception to Sentry - logger.exception("got exception while processing SQS message") - return super().failure_handler(record, exception) # type: ignore - ``` diff --git a/docs/utilities/data_classes.md b/docs/utilities/data_classes.md index cbe874d4b94..8353d904bb1 100644 --- a/docs/utilities/data_classes.md +++ b/docs/utilities/data_classes.md @@ -80,7 +80,7 @@ Event Source | Data_class [SNS](#sns) | `SNSEvent` [SQS](#sqs) | `SQSEvent` -!!! info +???+ info The examples provided below are far from exhaustive - the data classes themselves are designed to provide a form of documentation inherently (via autocompletion, types and docstrings). @@ -542,8 +542,8 @@ Verify Auth Challenge | `data_classes.cognito_user_pool_event.VerifyAuthChalleng #### Define Auth Challenge Example -!!! warning "NOTE" - In this example we are modifying the wrapped dict response fields, so we need to return the json serializable wrapped event in `event.raw_event` +???+ note + In this example we are modifying the wrapped dict response fields, so we need to return the json serializable wrapped event in `event.raw_event`. This example is based on the AWS Cognito docs for [Define Auth Challenge Lambda Trigger](https://docs.aws.amazon.com/cognito/latest/developerguide/user-pool-lambda-define-auth-challenge.html){target="_blank"}. diff --git a/docs/utilities/feature_flags.md b/docs/utilities/feature_flags.md index 47cc324f6ae..540521569a2 100644 --- a/docs/utilities/feature_flags.md +++ b/docs/utilities/feature_flags.md @@ -3,7 +3,8 @@ title: Feature flags description: Utility --- -!!! note "This is currently in Beta, as we might change Store parameters in the next release." +???+ note + This is currently in Beta, as we might change Store parameters in the next release. The feature flags utility provides a simple rule engine to define when one or multiple features should be enabled depending on the input. @@ -15,9 +16,11 @@ Feature flags are used to modify behaviour without changing the application's co **Dynamic flags**. Indicates something can have varying states, for example enable a premium feature for customer X not Y. -!!! tip "You can use [Parameters utility](parameters.md) for static flags while this utility can do both static and dynamic feature flags." +???+ tip + You can use [Parameters utility](parameters.md) for static flags while this utility can do both static and dynamic feature flags. -!!! warning "Be mindful that feature flags can increase the complexity of your application over time; use them sparingly." +???+ warning + Be mindful that feature flags can increase the complexity of your application over time; use them sparingly. If you want to learn more about feature flags, their variations and trade-offs, check these articles: @@ -430,19 +433,17 @@ This utility expects a certain schema to be stored as JSON within AWS AppConfig. A feature can simply have its name and a `default` value. This is either on or off, also known as a [static flag](#static-flags). -=== "minimal_schema.json" - - ```json hl_lines="2-3 6-7" - { - "global_feature": { - "default": true - }, - "non_boolean_global_feature": { - "default": {"group": "read-only"}, - "boolean_type": False - }, - } - ``` +```json hl_lines="2-3 5-7" title="minimal_schema.json" +{ + "global_feature": { + "default": True + }, + "non_boolean_global_feature": { + "default": {"group": "read-only"}, + "boolean_type": False + }, +} +``` If you need more control and want to provide context such as user group, permissions, location, etc., you need to add rules to your feature flag configuration. @@ -454,42 +455,40 @@ When adding `rules` to a feature, they must contain: 2. `when_match` boolean or JSON value that should be used when conditions match 3. A list of `conditions` for evaluation -=== "feature_with_rules.json" - - ```json hl_lines="4-11 19-26" - { - "premium_feature": { - "default": false, - "rules": { - "customer tier equals premium": { - "when_match": true, - "conditions": [ - { - "action": "EQUALS", - "key": "tier", - "value": "premium" - } - ] - } - } - }, - "non_boolean_premium_feature": { - "default": [], - "rules": { - "customer tier equals premium": { - "when_match": ["remove_limits", "remove_ads"], - "conditions": [ - { - "action": "EQUALS", - "key": "tier", - "value": "premium" - } - ] - } - } - } - } - ``` + ```json hl_lines="4-11 19-26" title="feature_with_rules.json" + { + "premium_feature": { + "default": false, + "rules": { + "customer tier equals premium": { + "when_match": true, + "conditions": [ + { + "action": "EQUALS", + "key": "tier", + "value": "premium" + } + ] + } + } + }, + "non_boolean_premium_feature": { + "default": [], + "rules": { + "customer tier equals premium": { + "when_match": ["remove_limits", "remove_ads"], + "conditions": [ + { + "action": "EQUALS", + "key": "tier", + "value": "premium" + } + ] + } + } + } + } + ``` You can have multiple rules with different names. The rule engine will return the first result `when_match` of the matching rule configuration, or `default` value when none of the rules apply. @@ -497,20 +496,18 @@ You can have multiple rules with different names. The rule engine will return th The `conditions` block is a list of conditions that contain `action`, `key`, and `value` keys: -=== "conditions.json" - - ```json hl_lines="5-7" - { - ... - "conditions": [ - { - "action": "EQUALS", - "key": "tier", - "value": "premium" - } - ] - } - ``` +```json hl_lines="5-7" title="conditions.json" +{ + ... + "conditions": [ + { + "action": "EQUALS", + "key": "tier", + "value": "premium" + } + ] +} +``` The `action` configuration can have the following values, where the expressions **`a`** is the `key` and **`b`** is the `value` above: @@ -530,7 +527,8 @@ Action | Equivalent expression **VALUE_NOT_IN_KEY** | `lambda a, b: b not in a` -!!! info "The `**key**` and `**value**` will be compared to the input from the `**context**` parameter." +???+ info + The `**key**` and `**value**` will be compared to the input from the `**context**` parameter. **For multiple conditions**, we will evaluate the list of conditions as a logical `AND`, so all conditions needs to match to return `when_match` value. @@ -593,7 +591,8 @@ For this to work, you need to use a JMESPath expression via the `envelope` param ### Built-in store provider -!!! info "For GA, you'll be able to bring your own store." +???+ info + For GA, you'll be able to bring your own store. #### AppConfig @@ -612,36 +611,35 @@ Parameter | Default | Description **jmespath_options** | `None` | For advanced use cases when you want to bring your own [JMESPath functions](https://github.com/jmespath/jmespath.py#custom-functions){target="_blank"} **logger** | `logging.Logger` | Logger to use for debug. You can optionally supply an instance of Powertools Logger. -=== "appconfig_store_example.py" - ```python hl_lines="19-25" - from botocore.config import Config +```python hl_lines="19-25" title="AppConfigStore sample" +from botocore.config import Config - import jmespath +import jmespath - boto_config = Config(read_timeout=10, retries={"total_max_attempts": 2}) +boto_config = Config(read_timeout=10, retries={"total_max_attempts": 2}) - # Custom JMESPath functions - class CustomFunctions(jmespath.functions.Functions): +# Custom JMESPath functions +class CustomFunctions(jmespath.functions.Functions): - @jmespath.functions.signature({'types': ['string']}) - def _func_special_decoder(self, s): - return my_custom_decoder_logic(s) + @jmespath.functions.signature({'types': ['string']}) + def _func_special_decoder(self, s): + return my_custom_decoder_logic(s) - custom_jmespath_options = {"custom_functions": CustomFunctions()} +custom_jmespath_options = {"custom_functions": CustomFunctions()} - app_config = AppConfigStore( - environment="dev", - application="product-catalogue", - name="configuration", - max_age=120, - envelope = "features", - sdk_config=boto_config, - jmespath_options=custom_jmespath_options - ) - ``` +app_config = AppConfigStore( + environment="dev", + application="product-catalogue", + name="configuration", + max_age=120, + envelope = "features", + sdk_config=boto_config, + jmespath_options=custom_jmespath_options +) +``` ## Testing your code @@ -649,62 +647,61 @@ You can unit test your feature flags locally and independently without setting u `AppConfigStore` only fetches a JSON document with a specific schema. This allows you to mock the response and use it to verify the rule evaluation. -!!! warning "This excerpt relies on `pytest` and `pytest-mock` dependencies" - -=== "test_feature_flags_independently.py" - - ```python hl_lines="9-11" - from typing import Dict, List, Optional - - from aws_lambda_powertools.utilities.feature_flags import FeatureFlags, AppConfigStore, RuleAction - - - def init_feature_flags(mocker, mock_schema, envelope="") -> FeatureFlags: - """Mock AppConfig Store get_configuration method to use mock schema instead""" - - method_to_mock = "aws_lambda_powertools.utilities.feature_flags.AppConfigStore.get_configuration" - mocked_get_conf = mocker.patch(method_to_mock) - mocked_get_conf.return_value = mock_schema - - app_conf_store = AppConfigStore( - environment="test_env", - application="test_app", - name="test_conf_name", - envelope=envelope, - ) - - return FeatureFlags(store=app_conf_store) - - - def test_flags_condition_match(mocker): - # GIVEN - expected_value = True - mocked_app_config_schema = { - "my_feature": { - "default": expected_value, - "rules": { - "tenant id equals 12345": { - "when_match": True, - "conditions": [ - { - "action": RuleAction.EQUALS.value, - "key": "tenant_id", - "value": "12345", - } - ], - } - }, - } - } - - # WHEN - ctx = {"tenant_id": "12345", "username": "a"} - feature_flags = init_feature_flags(mocker=mocker, mock_schema=mocked_app_config_schema) - flag = feature_flags.evaluate(name="my_feature", context=ctx, default=False) - - # THEN - assert flag == expected_value - ``` +???+ warning + This excerpt relies on `pytest` and `pytest-mock` dependencies. + +```python hl_lines="9-11" title="Unit testing feature flags" +from typing import Dict, List, Optional + +from aws_lambda_powertools.utilities.feature_flags import FeatureFlags, AppConfigStore, RuleAction + + +def init_feature_flags(mocker, mock_schema, envelope="") -> FeatureFlags: + """Mock AppConfig Store get_configuration method to use mock schema instead""" + + method_to_mock = "aws_lambda_powertools.utilities.feature_flags.AppConfigStore.get_configuration" + mocked_get_conf = mocker.patch(method_to_mock) + mocked_get_conf.return_value = mock_schema + + app_conf_store = AppConfigStore( + environment="test_env", + application="test_app", + name="test_conf_name", + envelope=envelope, + ) + + return FeatureFlags(store=app_conf_store) + + +def test_flags_condition_match(mocker): + # GIVEN + expected_value = True + mocked_app_config_schema = { + "my_feature": { + "default": expected_value, + "rules": { + "tenant id equals 12345": { + "when_match": True, + "conditions": [ + { + "action": RuleAction.EQUALS.value, + "key": "tenant_id", + "value": "12345", + } + ], + } + }, + } + } + + # WHEN + ctx = {"tenant_id": "12345", "username": "a"} + feature_flags = init_feature_flags(mocker=mocker, mock_schema=mocked_app_config_schema) + flag = feature_flags.evaluate(name="my_feature", context=ctx, default=False) + + # THEN + assert flag == expected_value +``` ## Feature flags vs Parameters vs env vars diff --git a/docs/utilities/idempotency.md b/docs/utilities/idempotency.md index 18a99b53999..4b03b66abd4 100644 --- a/docs/utilities/idempotency.md +++ b/docs/utilities/idempotency.md @@ -40,45 +40,41 @@ Configuration | Value | Notes Partition key | `id` | TTL attribute name | `expiration` | This can only be configured after your table is created if you're using AWS Console -!!! tip "You can share a single state table for all functions" +???+ tip "Tip: You can share a single state table for all functions" You can reuse the same DynamoDB table to store idempotency state. We add your `function_name` in addition to the idempotency key as a hash key. -> Example using AWS Serverless Application Model (SAM) - -=== "template.yml" - - ```yaml hl_lines="5-13 21-23" - Resources: - IdempotencyTable: - Type: AWS::DynamoDB::Table - Properties: - AttributeDefinitions: - - AttributeName: id - AttributeType: S - KeySchema: - - AttributeName: id - KeyType: HASH - TimeToLiveSpecification: - AttributeName: expiration - Enabled: true - BillingMode: PAY_PER_REQUEST - - HelloWorldFunction: - Type: AWS::Serverless::Function - Properties: - Runtime: python3.8 - ... - Policies: - - DynamoDBCrudPolicy: - TableName: !Ref IdempotencyTable - ``` +```yaml hl_lines="5-13 21-23" title="AWS Serverless Application Model (SAM) example" +Resources: + IdempotencyTable: + Type: AWS::DynamoDB::Table + Properties: + AttributeDefinitions: + - AttributeName: id + AttributeType: S + KeySchema: + - AttributeName: id + KeyType: HASH + TimeToLiveSpecification: + AttributeName: expiration + Enabled: true + BillingMode: PAY_PER_REQUEST + + HelloWorldFunction: + Type: AWS::Serverless::Function + Properties: + Runtime: python3.8 + ... + Policies: + - DynamoDBCrudPolicy: + TableName: !Ref IdempotencyTable +``` -!!! warning "Large responses with DynamoDB persistence layer" +???+ warning "Warning: Large responses with DynamoDB persistence layer" When using this utility with DynamoDB, your function's responses must be [smaller than 400KB](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Limits.html#limits-items). Larger items cannot be written to DynamoDB and will cause exceptions. -!!! info "DynamoDB " +???+ info "Info: DynamoDB" Each function invocation will generally make 2 requests to DynamoDB. If the result returned by your Lambda is less than 1kb, you can expect 2 WCUs per invocation. For retried invocations, you will see 1WCU and 1RCU. Review the [DynamoDB pricing documentation](https://aws.amazon.com/dynamodb/pricing/) to @@ -124,45 +120,51 @@ You can quickly start by initializing the `DynamoDBPersistenceLayer` class and u Similar to [idempotent decorator](#idempotent-decorator), you can use `idempotent_function` decorator for any synchronous Python function. -When using `idempotent_function`, you must tell us which keyword parameter in your function signature has the data we should use via **`data_keyword_argument`** - Such data must be JSON serializable. +When using `idempotent_function`, you must tell us which keyword parameter in your function signature has the data we should use via **`data_keyword_argument`**. -!!! warning "Make sure to call your decorated function using keyword arguments" +!!! info "We support JSON serializable data, [Python Dataclasses](https://docs.python.org/3.7/library/dataclasses.html){target="_blank"}, [Parser/Pydantic Models](parser.md){target="_blank"}, and our [Event Source Data Classes](./data_classes.md){target="_blank"}." -=== "app.py" +???+ warning + Make sure to call your decorated function using keyword arguments - This example also demonstrates how you can integrate with [Batch utility](batch.md), so you can process each record in an idempotent manner. +=== "batch_sample.py" - ```python hl_lines="4 13 18 25" - import uuid + This example also demonstrates how you can integrate with [Batch utility](batch.md), so you can process each record in an idempotent manner. - from aws_lambda_powertools.utilities.batch import sqs_batch_processor - from aws_lambda_powertools.utilities.idempotency import idempotent_function, DynamoDBPersistenceLayer, IdempotencyConfig + ```python hl_lines="4-5 16 21 29" + from aws_lambda_powertools.utilities.batch import (BatchProcessor, EventType, + batch_processor) + from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + processor = BatchProcessor(event_type=EventType.SQS) dynamodb = DynamoDBPersistenceLayer(table_name="idem") config = IdempotencyConfig( - event_key_jmespath="messageId", # see "Choosing a payload subset for idempotency" section + event_key_jmespath="messageId", # see Choosing a payload subset section use_local_cache=True, ) - @idempotent_function(data_keyword_argument="data", config=config, persistence_store=dynamodb) - def dummy(arg_one, arg_two, data: dict, **kwargs): - return {"data": data} - @idempotent_function(data_keyword_argument="record", config=config, persistence_store=dynamodb) - def record_handler(record): + def record_handler(record: SQSRecord): return {"message": record["body"]} - @sqs_batch_processor(record_handler=record_handler) + @idempotent_function(data_keyword_argument="data", config=config, persistence_store=dynamodb) + def dummy(arg_one, arg_two, data: dict, **kwargs): + return {"data": data} + + + @batch_processor(record_handler=record_handler, processor=processor) def lambda_handler(event, context): # `data` parameter must be called as a keyword argument to work dummy("hello", "universe", data="test") - return {"statusCode": 200} + return processor.response() ``` -=== "Example event" +=== "Batch event" ```json hl_lines="4" { @@ -193,9 +195,82 @@ When using `idempotent_function`, you must tell us which keyword parameter in yo } ``` +=== "dataclass_sample.py" + + ```python hl_lines="3-4 23 32" + from dataclasses import dataclass + + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + + dynamodb = DynamoDBPersistenceLayer(table_name="idem") + config = IdempotencyConfig( + event_key_jmespath="order_id", # see Choosing a payload subset section + use_local_cache=True, + ) + + @dataclass + class OrderItem: + sku: str + description: str + + @dataclass + class Order: + item: OrderItem + order_id: int + + + @idempotent_function(data_keyword_argument="order", config=config, persistence_store=dynamodb) + def process_order(order: Order): + return f"processed order {order.order_id}" + + + order_item = OrderItem(sku="fake", description="sample") + order = Order(item=order_item, order_id="fake-id") + + # `order` parameter must be called as a keyword argument to work + process_order(order=order) + ``` + +=== "parser_pydantic_sample.py" + + ```python hl_lines="1-2 22 31" + from aws_lambda_powertools.utilities.idempotency import ( + DynamoDBPersistenceLayer, IdempotencyConfig, idempotent_function) + from aws_lambda_powertools.utilities.parser import BaseModel + + dynamodb = DynamoDBPersistenceLayer(table_name="idem") + config = IdempotencyConfig( + event_key_jmespath="order_id", # see Choosing a payload subset section + use_local_cache=True, + ) + + + class OrderItem(BaseModel): + sku: str + description: str + + + class Order(BaseModel): + item: OrderItem + order_id: int + + + @idempotent_function(data_keyword_argument="order", config=config, persistence_store=dynamodb) + def process_order(order: Order): + return f"processed order {order.order_id}" + + + order_item = OrderItem(sku="fake", description="sample") + order = Order(item=order_item, order_id="fake-id") + + # `order` parameter must be called as a keyword argument to work + process_order(order=order) + ``` + ### Choosing a payload subset for idempotency -!!! tip "Dealing with always changing payloads" +???+ tip "Tip: Dealing with always changing payloads" When dealing with a more elaborate payload, where parts of the payload always change, you should use **`event_key_jmespath`** parameter. Use [`IdempotencyConfig`](#customizing-the-default-behavior) to instruct the idempotent decorator to only use a portion of your payload to verify whether a request is idempotent, and therefore it should not be retried. @@ -206,10 +281,10 @@ In this example, we have a Lambda handler that creates a payment for a user subs Imagine the function executes successfully, but the client never receives the response due to a connection issue. It is safe to retry in this instance, as the idempotent decorator will return a previously saved response. -!!! warning "Idempotency for JSON payloads" +???+ warning "Warning: Idempotency for JSON payloads" The payload extracted by the `event_key_jmespath` is treated as a string by default, so will be sensitive to differences in whitespace even when the JSON payload itself is identical. - To alter this behaviour, we can use the [JMESPath built-in function](jmespath_functions.md#powertools_json-function) `powertools_json()` to treat the payload as a JSON object rather than a string. + To alter this behaviour, we can use the [JMESPath built-in function](jmespath_functions.md#powertools_json-function) `powertools_json()` to treat the payload as a JSON object (dict) rather than a string. === "payment.py" @@ -284,7 +359,7 @@ This sequence diagram shows an example flow of what happens in the payment scena The client was successful in receiving the result after the retry. Since the Lambda handler was only executed once, our customer hasn't been charged twice. -!!! note +???+ note Bear in mind that the entire Lambda handler is treated as a single idempotent operation. If your Lambda handler can cause multiple side effects, consider splitting it into separate functions. ### Handling exceptions @@ -296,11 +371,10 @@ This means that new invocations will execute your code again despite having the ![Idempotent sequence exception](../media/idempotent_sequence_exception.png) If you are using `idempotent_function`, any unhandled exceptions that are raised _inside_ the decorated function will cause the record in the persistence layer to be deleted, and allow the function to be executed again if retried. -If an Exception is raised _outside_ the scope of the decorated function and after your function has been called, the persistent record will not be affected. In this case, idempotency will be maintained for your decorated function. Example: -=== "app.py" +If an Exception is raised _outside_ the scope of the decorated function and after your function has been called, the persistent record will not be affected. In this case, idempotency will be maintained for your decorated function. Example: -```python hl_lines="2-4 8-10" +```python hl_lines="2-4 8-10" title="Exception not affecting idempotency record sample" def lambda_handler(event, context): # If an exception is raised here, no idempotent record will ever get created as the # idempotent function does not get called @@ -319,7 +393,7 @@ def call_external_service(data: dict, **kwargs): return result.json() ``` -!!! warning +???+ warning **We will raise `IdempotencyPersistenceLayerError`** if any of the calls to the persistence layer fail unexpectedly. As this happens outside the scope of your decorated function, you are not able to catch it if you're using the `idempotent` decorator on your Lambda handler. @@ -330,20 +404,18 @@ def call_external_service(data: dict, **kwargs): This persistence layer is built-in, and you can either use an existing DynamoDB table or create a new one dedicated for idempotency state (recommended). -=== "app.py" - - ```python hl_lines="5-9" - from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer - - persistence_layer = DynamoDBPersistenceLayer( - table_name="IdempotencyTable", - key_attr="idempotency_key", - expiry_attr="expires_at", - status_attr="current_status", - data_attr="result_data", - validation_key_attr="validation_key", - ) - ``` +```python hl_lines="5-9" title="Customizing DynamoDBPersistenceLayer to suit your table structure" +from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer + +persistence_layer = DynamoDBPersistenceLayer( + table_name="IdempotencyTable", + key_attr="idempotency_key", + expiry_attr="expires_at", + status_attr="current_status", + data_attr="result_data", + validation_key_attr="validation_key", +) +``` When using DynamoDB as a persistence layer, you can alter the attribute names by passing these parameters when initializing the persistence layer: @@ -378,7 +450,8 @@ Parameter | Default | Description This utility will raise an **`IdempotencyAlreadyInProgressError`** exception if you receive **multiple invocations with the same payload while the first invocation hasn't completed yet**. -!!! info "If you receive `IdempotencyAlreadyInProgressError`, you can safely retry the operation." +???+ info + If you receive `IdempotencyAlreadyInProgressError`, you can safely retry the operation. This is a locking mechanism for correctness. Since we don't know the result from the first invocation yet, we can't safely allow another concurrent execution. @@ -386,69 +459,65 @@ This is a locking mechanism for correctness. Since we don't know the result from **By default, in-memory local caching is disabled**, since we don't know how much memory you consume per invocation compared to the maximum configured in your Lambda function. -!!! note "This in-memory cache is local to each Lambda execution environment" +???+ note "Note: This in-memory cache is local to each Lambda execution environment" This means it will be effective in cases where your function's concurrency is low in comparison to the number of "retry" invocations with the same payload, because cache might be empty. You can enable in-memory caching with the **`use_local_cache`** parameter: -=== "app.py" - - ```python hl_lines="8 11" - from aws_lambda_powertools.utilities.idempotency import ( - IdempotencyConfig, DynamoDBPersistenceLayer, idempotent - ) +```python hl_lines="8 11" title="Caching idempotent transactions in-memory to prevent multiple calls to storage" +from aws_lambda_powertools.utilities.idempotency import ( + IdempotencyConfig, DynamoDBPersistenceLayer, idempotent +) - persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") - config = IdempotencyConfig( - event_key_jmespath="body", - use_local_cache=True, - ) +persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") +config = IdempotencyConfig( + event_key_jmespath="body", + use_local_cache=True, +) - @idempotent(config=config, persistence_store=persistence_layer) - def handler(event, context): - ... - ``` +@idempotent(config=config, persistence_store=persistence_layer) +def handler(event, context): + ... +``` When enabled, the default is to cache a maximum of 256 records in each Lambda execution environment - You can change it with the **`local_cache_max_items`** parameter. ### Expiring idempotency records -!!! note +???+ note By default, we expire idempotency records after **an hour** (3600 seconds). In most cases, it is not desirable to store the idempotency records forever. Rather, you want to guarantee that the same payload won't be executed within a period of time. You can change this window with the **`expires_after_seconds`** parameter: -=== "app.py" +```python hl_lines="8 11" title="Adjusting cache TTL" +from aws_lambda_powertools.utilities.idempotency import ( + IdempotencyConfig, DynamoDBPersistenceLayer, idempotent +) - ```python hl_lines="8 11" - from aws_lambda_powertools.utilities.idempotency import ( - IdempotencyConfig, DynamoDBPersistenceLayer, idempotent - ) - - persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") - config = IdempotencyConfig( - event_key_jmespath="body", - expires_after_seconds=5*60, # 5 minutes - ) +persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") +config = IdempotencyConfig( + event_key_jmespath="body", + expires_after_seconds=5*60, # 5 minutes +) - @idempotent(config=config, persistence_store=persistence_layer) - def handler(event, context): - ... - ``` +@idempotent(config=config, persistence_store=persistence_layer) +def handler(event, context): + ... +``` This will mark any records older than 5 minutes as expired, and the lambda handler will be executed as normal if it is invoked with a matching payload. -!!! note "DynamoDB time-to-live field" +???+ note "Note: DynamoDB time-to-live field" This utility uses **`expiration`** as the TTL field in DynamoDB, as [demonstrated in the SAM example earlier](#required-resources). ### Payload validation -!!! question "What if your function is invoked with the same payload except some outer parameters have changed?" +???+ question "Question: What if your function is invoked with the same payload except some outer parameters have changed?" Example: A payment transaction for a given productID was requested twice for the same customer, **however the amount to be paid has changed in the second transaction**. -By default, we will return the same result as it returned before, however in this instance it may be misleading - We provide a fail fast payload validation to address this edge case. +By default, we will return the same result as it returned before, however in this instance it may be misleading; we provide a fail fast payload validation to address this edge case. With **`payload_validation_jmespath`**, you can provide an additional JMESPath expression to specify which part of the event body should be validated against previous idempotent invocations @@ -513,7 +582,7 @@ With **`payload_validation_jmespath`**, you can provide an additional JMESPath e In this example, the **`userDetail`** and **`productId`** keys are used as the payload to generate the idempotency key, as per **`event_key_jmespath`** parameter. -!!! note +???+ note If we try to send the same request but with a different amount, we will raise **`IdempotencyValidationError`**. Without payload validation, we would have returned the same result as we did for the initial request. Since we're also returning an amount in the response, this could be quite confusing for the client. @@ -618,25 +687,24 @@ The **`boto_config`** and **`boto3_session`** parameters enable you to pass in a ### Using a DynamoDB table with a composite primary key -If you wish to use this utility with a DynamoDB table that is configured with a composite primary key (uses both partition key and sort key), you -should set the `sort_key_attr` parameter when initializing your persistence layer. When this parameter is set, the partition key value for all idempotency entries -will be the same, with the idempotency key being saved as the sort key instead of the partition key. You can optionally set a static value for the partition -key using the `static_pk_value` parameter. If not specified, it will default to `idempotency#{LAMBDA_FUNCTION_NAME}`. +When using a composite primary key table (hash+range key), use `sort_key_attr` parameter when initializing your persistence layer. -=== "MyLambdaFunction" +With this setting, we will save the idempotency key in the sort key instead of the primary key. By default, the primary key will now be set to `idempotency#{LAMBDA_FUNCTION_NAME}`. - ```python hl_lines="5" - from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer, idempotent +You can optionally set a static value for the partition key using the `static_pk_value` parameter. - persistence_layer = DynamoDBPersistenceLayer( - table_name="IdempotencyTable", - sort_key_attr='sort_key') +```python hl_lines="5" title="Reusing a DynamoDB table that uses a composite primary key" +from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer, idempotent +persistence_layer = DynamoDBPersistenceLayer( + table_name="IdempotencyTable", + sort_key_attr='sort_key') - @idempotent(persistence_store=persistence_layer) - def handler(event, context): - return {"message": "success": "id": event['body']['id]} - ``` + +@idempotent(persistence_store=persistence_layer) +def handler(event, context): + return {"message": "success": "id": event['body']['id]} +``` The example function above would cause data to be stored in DynamoDB like this: @@ -653,138 +721,136 @@ This utility provides an abstract base class (ABC), so that you can implement yo You can inherit from the `BasePersistenceLayer` class and implement the abstract methods `_get_record`, `_put_record`, `_update_record` and `_delete_record`. -=== "DynamoDB persistence layer implementation excerpt" - - ```python hl_lines="8-13 57 65 74 96 124" - import datetime - import logging - from typing import Any, Dict, Optional - - import boto3 - from botocore.config import Config - - from aws_lambda_powertools.utilities.idempotency import BasePersistenceLayer - from aws_lambda_powertools.utilities.idempotency.exceptions import ( - IdempotencyItemAlreadyExistsError, - IdempotencyItemNotFoundError, - ) - from aws_lambda_powertools.utilities.idempotency.persistence.base import DataRecord - - logger = logging.getLogger(__name__) - - - class DynamoDBPersistenceLayer(BasePersistenceLayer): - def __init__( - self, - table_name: str, - key_attr: str = "id", - expiry_attr: str = "expiration", - status_attr: str = "status", - data_attr: str = "data", - validation_key_attr: str = "validation", - boto_config: Optional[Config] = None, - boto3_session: Optional[boto3.session.Session] = None, - ): - boto_config = boto_config or Config() - session = boto3_session or boto3.session.Session() - self._ddb_resource = session.resource("dynamodb", config=boto_config) - self.table_name = table_name - self.table = self._ddb_resource.Table(self.table_name) - self.key_attr = key_attr - self.expiry_attr = expiry_attr - self.status_attr = status_attr - self.data_attr = data_attr - self.validation_key_attr = validation_key_attr - super(DynamoDBPersistenceLayer, self).__init__() - - def _item_to_data_record(self, item: Dict[str, Any]) -> DataRecord: - """ - Translate raw item records from DynamoDB to DataRecord - - Parameters - ---------- - item: Dict[str, Union[str, int]] - Item format from dynamodb response - - Returns - ------- - DataRecord - representation of item - - """ - return DataRecord( - idempotency_key=item[self.key_attr], - status=item[self.status_attr], - expiry_timestamp=item[self.expiry_attr], - response_data=item.get(self.data_attr), - payload_hash=item.get(self.validation_key_attr), - ) - - def _get_record(self, idempotency_key) -> DataRecord: - response = self.table.get_item(Key={self.key_attr: idempotency_key}, ConsistentRead=True) - - try: - item = response["Item"] - except KeyError: - raise IdempotencyItemNotFoundError - return self._item_to_data_record(item) - - def _put_record(self, data_record: DataRecord) -> None: - item = { - self.key_attr: data_record.idempotency_key, - self.expiry_attr: data_record.expiry_timestamp, - self.status_attr: data_record.status, - } - - if self.payload_validation_enabled: - item[self.validation_key_attr] = data_record.payload_hash - - now = datetime.datetime.now() - try: - logger.debug(f"Putting record for idempotency key: {data_record.idempotency_key}") - self.table.put_item( - Item=item, - ConditionExpression=f"attribute_not_exists({self.key_attr}) OR {self.expiry_attr} < :now", - ExpressionAttributeValues={":now": int(now.timestamp())}, - ) - except self._ddb_resource.meta.client.exceptions.ConditionalCheckFailedException: - logger.debug(f"Failed to put record for already existing idempotency key: {data_record.idempotency_key}") - raise IdempotencyItemAlreadyExistsError - - def _update_record(self, data_record: DataRecord): - logger.debug(f"Updating record for idempotency key: {data_record.idempotency_key}") - update_expression = "SET #response_data = :response_data, #expiry = :expiry, #status = :status" - expression_attr_values = { - ":expiry": data_record.expiry_timestamp, - ":response_data": data_record.response_data, - ":status": data_record.status, - } - expression_attr_names = { - "#response_data": self.data_attr, - "#expiry": self.expiry_attr, - "#status": self.status_attr, - } - - if self.payload_validation_enabled: - update_expression += ", #validation_key = :validation_key" - expression_attr_values[":validation_key"] = data_record.payload_hash - expression_attr_names["#validation_key"] = self.validation_key_attr - - kwargs = { - "Key": {self.key_attr: data_record.idempotency_key}, - "UpdateExpression": update_expression, - "ExpressionAttributeValues": expression_attr_values, - "ExpressionAttributeNames": expression_attr_names, - } - - self.table.update_item(**kwargs) - - def _delete_record(self, data_record: DataRecord) -> None: - logger.debug(f"Deleting record for idempotency key: {data_record.idempotency_key}") - self.table.delete_item(Key={self.key_attr: data_record.idempotency_key},) - ``` +```python hl_lines="8-13 57 65 74 96 124" title="Excerpt DynamoDB Persisntence Layer implementation for reference" +import datetime +import logging +from typing import Any, Dict, Optional + +import boto3 +from botocore.config import Config + +from aws_lambda_powertools.utilities.idempotency import BasePersistenceLayer +from aws_lambda_powertools.utilities.idempotency.exceptions import ( + IdempotencyItemAlreadyExistsError, + IdempotencyItemNotFoundError, +) +from aws_lambda_powertools.utilities.idempotency.persistence.base import DataRecord + +logger = logging.getLogger(__name__) + + +class DynamoDBPersistenceLayer(BasePersistenceLayer): + def __init__( + self, + table_name: str, + key_attr: str = "id", + expiry_attr: str = "expiration", + status_attr: str = "status", + data_attr: str = "data", + validation_key_attr: str = "validation", + boto_config: Optional[Config] = None, + boto3_session: Optional[boto3.session.Session] = None, + ): + boto_config = boto_config or Config() + session = boto3_session or boto3.session.Session() + self._ddb_resource = session.resource("dynamodb", config=boto_config) + self.table_name = table_name + self.table = self._ddb_resource.Table(self.table_name) + self.key_attr = key_attr + self.expiry_attr = expiry_attr + self.status_attr = status_attr + self.data_attr = data_attr + self.validation_key_attr = validation_key_attr + super(DynamoDBPersistenceLayer, self).__init__() + + def _item_to_data_record(self, item: Dict[str, Any]) -> DataRecord: + """ + Translate raw item records from DynamoDB to DataRecord + + Parameters + ---------- + item: Dict[str, Union[str, int]] + Item format from dynamodb response + + Returns + ------- + DataRecord + representation of item + + """ + return DataRecord( + idempotency_key=item[self.key_attr], + status=item[self.status_attr], + expiry_timestamp=item[self.expiry_attr], + response_data=item.get(self.data_attr), + payload_hash=item.get(self.validation_key_attr), + ) + + def _get_record(self, idempotency_key) -> DataRecord: + response = self.table.get_item(Key={self.key_attr: idempotency_key}, ConsistentRead=True) + + try: + item = response["Item"] + except KeyError: + raise IdempotencyItemNotFoundError + return self._item_to_data_record(item) + + def _put_record(self, data_record: DataRecord) -> None: + item = { + self.key_attr: data_record.idempotency_key, + self.expiry_attr: data_record.expiry_timestamp, + self.status_attr: data_record.status, + } + + if self.payload_validation_enabled: + item[self.validation_key_attr] = data_record.payload_hash + + now = datetime.datetime.now() + try: + logger.debug(f"Putting record for idempotency key: {data_record.idempotency_key}") + self.table.put_item( + Item=item, + ConditionExpression=f"attribute_not_exists({self.key_attr}) OR {self.expiry_attr} < :now", + ExpressionAttributeValues={":now": int(now.timestamp())}, + ) + except self._ddb_resource.meta.client.exceptions.ConditionalCheckFailedException: + logger.debug(f"Failed to put record for already existing idempotency key: {data_record.idempotency_key}") + raise IdempotencyItemAlreadyExistsError + + def _update_record(self, data_record: DataRecord): + logger.debug(f"Updating record for idempotency key: {data_record.idempotency_key}") + update_expression = "SET #response_data = :response_data, #expiry = :expiry, #status = :status" + expression_attr_values = { + ":expiry": data_record.expiry_timestamp, + ":response_data": data_record.response_data, + ":status": data_record.status, + } + expression_attr_names = { + "#response_data": self.data_attr, + "#expiry": self.expiry_attr, + "#status": self.status_attr, + } + + if self.payload_validation_enabled: + update_expression += ", #validation_key = :validation_key" + expression_attr_values[":validation_key"] = data_record.payload_hash + expression_attr_names["#validation_key"] = self.validation_key_attr + + kwargs = { + "Key": {self.key_attr: data_record.idempotency_key}, + "UpdateExpression": update_expression, + "ExpressionAttributeValues": expression_attr_values, + "ExpressionAttributeNames": expression_attr_names, + } + + self.table.update_item(**kwargs) + + def _delete_record(self, data_record: DataRecord) -> None: + logger.debug(f"Deleting record for idempotency key: {data_record.idempotency_key}") + self.table.delete_item(Key={self.key_attr: data_record.idempotency_key},) +``` -!!! danger +???+ danger Pay attention to the documentation for each - you may need to perform additional checks inside these methods to ensure the idempotency guarantees remain intact. For example, the `_put_record` method needs to raise an exception if a non-expired record already exists in the data store with a matching key. @@ -795,30 +861,29 @@ You can inherit from the `BasePersistenceLayer` class and implement the abstract The idempotency utility can be used with the `validator` decorator. Ensure that idempotency is the innermost decorator. -!!! warning +???+ warning If you use an envelope with the validator, the event received by the idempotency utility will be the unwrapped - event - not the "raw" event Lambda was invoked with. You will need to account for this if you set the - `event_key_jmespath`. + event - not the "raw" event Lambda was invoked with. -=== "app.py" + Make sure to account for this behaviour, if you set the `event_key_jmespath`. - ```python hl_lines="9 10" - from aws_lambda_powertools.utilities.validation import validator, envelopes - from aws_lambda_powertools.utilities.idempotency import ( - IdempotencyConfig, DynamoDBPersistenceLayer, idempotent - ) +```python hl_lines="9 10" title="Using Idempotency with JSONSchema Validation utility" +from aws_lambda_powertools.utilities.validation import validator, envelopes +from aws_lambda_powertools.utilities.idempotency import ( + IdempotencyConfig, DynamoDBPersistenceLayer, idempotent +) - config = IdempotencyConfig(event_key_jmespath="[message, username]") - persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") +config = IdempotencyConfig(event_key_jmespath="[message, username]") +persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") - @validator(envelope=envelopes.API_GATEWAY_HTTP) - @idempotent(config=config, persistence_store=persistence_layer) - def lambda_handler(event, context): - cause_some_side_effects(event['username') - return {"message": event['message'], "statusCode": 200} - ``` +@validator(envelope=envelopes.API_GATEWAY_HTTP) +@idempotent(config=config, persistence_store=persistence_layer) +def lambda_handler(event, context): + cause_some_side_effects(event['username') + return {"message": event['message'], "statusCode": 200} +``` -!!! tip "JMESPath Powertools functions are also available" +???+ tip "Tip: JMESPath Powertools functions are also available" Built-in functions known in the validation utility like `powertools_json`, `powertools_base64`, `powertools_base64_gzip` are also available to use in this utility. diff --git a/docs/utilities/jmespath_functions.md b/docs/utilities/jmespath_functions.md index 583357a55e2..03b5fce1fd5 100644 --- a/docs/utilities/jmespath_functions.md +++ b/docs/utilities/jmespath_functions.md @@ -3,7 +3,8 @@ title: JMESPath Functions description: Utility --- -!!! tip "JMESPath is a query language for JSON used by AWS CLI, AWS Python SDK, and AWS Lambda Powertools for Python." +???+ tip + JMESPath is a query language for JSON used by AWS CLI, AWS Python SDK, and AWS Lambda Powertools for Python. Built-in [JMESPath](https://jmespath.org/){target="_blank"} Functions to easily deserialize common encoded JSON payloads in Lambda functions. @@ -18,7 +19,8 @@ You might have events that contains encoded JSON payloads as string, base64, or Lambda Powertools also have utilities like [validation](validation.md), [idempotency](idempotency.md), or [feature flags](feature_flags.md) where you might need to extract a portion of your data before using them. -!!! info "**Envelope** is the terminology we use for the JMESPath expression to extract your JSON object from your data input" +???+ info + **Envelope** is the terminology we use for the JMESPath expression to extract your JSON object from your data input. ### Extracting data @@ -107,7 +109,7 @@ Envelope | JMESPath expression ### Built-in JMESPath functions You can use our built-in JMESPath functions within your expressions to do exactly that to decode JSON Strings, base64, and uncompress gzip data. -!!! info +???+ info We use these for built-in envelopes to easily decode and unwrap events from sources like API Gateway, Kinesis, CloudWatch Logs, etc. #### powertools_json function @@ -142,31 +144,29 @@ This sample will decode the value within the `data` key into a valid JSON before This sample will decode the value within the `body` key of an API Gateway event into a valid JSON object to ensure the Idempotency utility processes a JSON object instead of a string. -=== "powertools_json_jmespath_function.py" - - ```python hl_lines="8" - import json - from aws_lambda_powertools.utilities.idempotency import ( - IdempotencyConfig, DynamoDBPersistenceLayer, idempotent - ) - - persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") - - config = IdempotencyConfig(event_key_jmespath="powertools_json(body)") - @idempotent(config=config, persistence_store=persistence_layer) - def handler(event:APIGatewayProxyEvent, context): - body = json.loads(event['body']) - payment = create_subscription_payment( - user=body['user'], - product=body['product_id'] - ) - ... - return { - "payment_id": payment.id, - "message": "success", - "statusCode": 200 - } - ``` +```python hl_lines="7" title="Deserializing JSON before using as idempotency key" +import json +from aws_lambda_powertools.utilities.idempotency import ( + IdempotencyConfig, DynamoDBPersistenceLayer, idempotent +) + +persistence_layer = DynamoDBPersistenceLayer(table_name="IdempotencyTable") +config = IdempotencyConfig(event_key_jmespath="powertools_json(body)") + +@idempotent(config=config, persistence_store=persistence_layer) +def handler(event:APIGatewayProxyEvent, context): + body = json.loads(event['body']) + payment = create_subscription_payment( + user=body['user'], + product=body['product_id'] + ) + ... + return { + "payment_id": payment.id, + "message": "success", + "statusCode": 200 + } +``` #### powertools_base64 function @@ -230,7 +230,7 @@ This sample will decompress and decode base64 data, then use JMESPath pipeline e ### Bring your own JMESPath function -!!! warning +???+ warning This should only be used for advanced use cases where you have special formats not covered by the built-in functions. For special binary formats that you want to decode before applying JSON Schema validation, you can bring your own [JMESPath function](https://github.com/jmespath/jmespath.py#custom-functions){target="_blank"} and any additional option via `jmespath_options` param. diff --git a/docs/utilities/middleware_factory.md b/docs/utilities/middleware_factory.md index 253bf6157c3..6133fb3c8af 100644 --- a/docs/utilities/middleware_factory.md +++ b/docs/utilities/middleware_factory.md @@ -18,44 +18,40 @@ You can create your own middleware using `lambda_handler_decorator`. The decorat * **event** - Lambda function invocation event * **context** - Lambda function context object -=== "app.py" +```python hl_lines="3-4 10" title="Creating your own middleware for before/after logic" +from aws_lambda_powertools.middleware_factory import lambda_handler_decorator - ```python hl_lines="3-4 10" - from aws_lambda_powertools.middleware_factory import lambda_handler_decorator +@lambda_handler_decorator +def middleware_before_after(handler, event, context): + # logic_before_handler_execution() + response = handler(event, context) + # logic_after_handler_execution() + return response - @lambda_handler_decorator - def middleware_before_after(handler, event, context): - # logic_before_handler_execution() - response = handler(event, context) - # logic_after_handler_execution() - return response - - @middleware_before_after - def lambda_handler(event, context): - ... - ``` +@middleware_before_after +def lambda_handler(event, context): + ... +``` ## Middleware with params You can also have your own keyword arguments after the mandatory arguments. -=== "app.py" - - ```python hl_lines="2 12" - @lambda_handler_decorator - def obfuscate_sensitive_data(handler, event, context, fields: List = None): - # Obfuscate email before calling Lambda handler - if fields: - for field in fields: - if field in event: - event[field] = obfuscate(event[field]) +```python hl_lines="2 12" title="Accepting arbitrary keyword arguments" +@lambda_handler_decorator +def obfuscate_sensitive_data(handler, event, context, fields: List = None): + # Obfuscate email before calling Lambda handler + if fields: + for field in fields: + if field in event: + event[field] = obfuscate(event[field]) - return handler(event, context) + return handler(event, context) - @obfuscate_sensitive_data(fields=["email"]) - def lambda_handler(event, context): - ... - ``` +@obfuscate_sensitive_data(fields=["email"]) +def lambda_handler(event, context): + ... +``` ## Tracing middleware execution @@ -63,37 +59,33 @@ If you are making use of [Tracer](../core/tracer.md), you can trace the executio This makes use of an existing Tracer instance that you may have initialized anywhere in your code. -=== "trace_middleware_execution.py" +```python hl_lines="3" title="Tracing custom middlewares with Tracer" +from aws_lambda_powertools.middleware_factory import lambda_handler_decorator - ```python hl_lines="3" - from aws_lambda_powertools.middleware_factory import lambda_handler_decorator +@lambda_handler_decorator(trace_execution=True) +def my_middleware(handler, event, context): + return handler(event, context) - @lambda_handler_decorator(trace_execution=True) - def my_middleware(handler, event, context): - return handler(event, context) - - @my_middleware - def lambda_handler(event, context): - ... - ``` +@my_middleware +def lambda_handler(event, context): + ... +``` When executed, your middleware name will [appear in AWS X-Ray Trace details as](../core/tracer.md) `## middleware_name`. For advanced use cases, you can instantiate [Tracer](../core/tracer.md) inside your middleware, and add annotations as well as metadata for additional operational insights. -=== "app.py" - - ```python hl_lines="6-8" - from aws_lambda_powertools.middleware_factory import lambda_handler_decorator - from aws_lambda_powertools import Tracer +```python hl_lines="6-8" title="Add custom tracing insights before/after in your middlware" +from aws_lambda_powertools.middleware_factory import lambda_handler_decorator +from aws_lambda_powertools import Tracer - @lambda_handler_decorator(trace_execution=True) - def middleware_name(handler, event, context): - tracer = Tracer() # Takes a copy of an existing tracer instance - tracer.add_annotation... - tracer.add_metadata... - return handler(event, context) - ``` +@lambda_handler_decorator(trace_execution=True) +def middleware_name(handler, event, context): + # tracer = Tracer() # Takes a copy of an existing tracer instance + # tracer.add_annotation... + # tracer.add_metadata... + return handler(event, context) +``` ## Tips @@ -101,13 +93,3 @@ For advanced use cases, you can instantiate [Tracer](../core/tracer.md) inside y * When nesting multiple middlewares, always return the handler with event and context, or response * Keep in mind [Python decorators execution order](https://realpython.com/primer-on-python-decorators/#nesting-decorators){target="_blank"}. Lambda handler is actually called once (top-down) * Async middlewares are not supported - -## Testing your code - -When unit testing middlewares with `trace_execution` option enabled, use `POWERTOOLS_TRACE_DISABLED` env var to safely disable Tracer. - -=== "shell" - - ```bash - POWERTOOLS_TRACE_DISABLED=1 python -m pytest - ``` diff --git a/docs/utilities/parameters.md b/docs/utilities/parameters.md index 51fd0196abd..395f24b5a76 100644 --- a/docs/utilities/parameters.md +++ b/docs/utilities/parameters.md @@ -21,7 +21,8 @@ By default, we fetch parameters from System Manager Parameter Store, secrets fro This utility requires additional permissions to work as expected. -!!! note "Different parameter providers require different permissions" +???+ note + Different parameter providers require different permissions. Provider | Function/Method | IAM Permission ------------------------------------------------- | ------------------------------------------------- | --------------------------------------------------------------------------------- @@ -38,121 +39,111 @@ You can retrieve a single parameter using `get_parameter` high-level function. For multiple parameters, you can use `get_parameters` and pass a path to retrieve them recursively. -=== "ssm_parameter_store.py" +```python hl_lines="1 5 9" title="Fetching multiple parameters recursively" +from aws_lambda_powertools.utilities import parameters - ```python hl_lines="1 5 9" - from aws_lambda_powertools.utilities import parameters +def handler(event, context): + # Retrieve a single parameter + value = parameters.get_parameter("/my/parameter") - def handler(event, context): - # Retrieve a single parameter - value = parameters.get_parameter("/my/parameter") - - # Retrieve multiple parameters from a path prefix recursively - # This returns a dict with the parameter name as key - values = parameters.get_parameters("/my/path/prefix") - for k, v in values.items(): - print(f"{k}: {v}") - ``` + # Retrieve multiple parameters from a path prefix recursively + # This returns a dict with the parameter name as key + values = parameters.get_parameters("/my/path/prefix") + for k, v in values.items(): + print(f"{k}: {v}") +``` ### Fetching secrets You can fetch secrets stored in Secrets Manager using `get_secrets`. -=== "secrets_manager.py" +```python hl_lines="1 5" title="Fetching secrets" +from aws_lambda_powertools.utilities import parameters - ```python hl_lines="1 5" - from aws_lambda_powertools.utilities import parameters - - def handler(event, context): - # Retrieve a single secret - value = parameters.get_secret("my-secret") - ``` +def handler(event, context): + # Retrieve a single secret + value = parameters.get_secret("my-secret") +``` ### Fetching app configurations -> New in 1.10.0 - You can fetch application configurations in AWS AppConfig using `get_app_config`. The following will retrieve the latest version and store it in the cache. -=== "appconfig.py" +```python hl_lines="1 5" title="Fetching latest config from AppConfig" +from aws_lambda_powertools.utilities import parameters - ```python hl_lines="1 5" - from aws_lambda_powertools.utilities import parameters - - def handler(event, context): - # Retrieve a single configuration, latest version - value: bytes = parameters.get_app_config(name="my_configuration", environment="my_env", application="my_app") - ``` +def handler(event, context): + # Retrieve a single configuration, latest version + value: bytes = parameters.get_app_config(name="my_configuration", environment="my_env", application="my_app") +``` ## Advanced ### Adjusting cache TTL +???+ tip + `max_age` parameter is also available in high level functions like `get_parameter`, `get_secret`, etc. + By default, we cache parameters retrieved in-memory for 5 seconds. You can adjust how long we should keep values in cache by using the param `max_age`, when using `get()` or `get_multiple()` methods across all providers. -=== "app.py" +```python hl_lines="9" title="Caching parameter(s) value in memory for longer than 5 seconds" +from aws_lambda_powertools.utilities import parameters +from botocore.config import Config - ```python hl_lines="9" - from aws_lambda_powertools.utilities import parameters - from botocore.config import Config +config = Config(region_name="us-west-1") +ssm_provider = parameters.SSMProvider(config=config) - config = Config(region_name="us-west-1") - ssm_provider = parameters.SSMProvider(config=config) +def handler(event, context): + # Retrieve a single parameter + value = ssm_provider.get("/my/parameter", max_age=60) # 1 minute - def handler(event, context): - # Retrieve a single parameter - value = ssm_provider.get("/my/parameter", max_age=60) # 1 minute - - # Retrieve multiple parameters from a path prefix - values = ssm_provider.get_multiple("/my/path/prefix") - for k, v in values.items(): - print(f"{k}: {v}") - ``` + # Retrieve multiple parameters from a path prefix + values = ssm_provider.get_multiple("/my/path/prefix", max_age=60) + for k, v in values.items(): + print(f"{k}: {v}") +``` ### Always fetching the latest If you'd like to always ensure you fetch the latest parameter from the store regardless if already available in cache, use `force_fetch` param. -=== "app.py" - - ```python hl_lines="5" - from aws_lambda_powertools.utilities import parameters +```python hl_lines="5" title="Forcefully fetching the latest parameter whether TTL has expired or not" +from aws_lambda_powertools.utilities import parameters - def handler(event, context): - # Retrieve a single parameter - value = parameters.get_parameter("/my/parameter", force_fetch=True) - ``` +def handler(event, context): + # Retrieve a single parameter + value = parameters.get_parameter("/my/parameter", force_fetch=True) +``` ### Built-in provider class For greater flexibility such as configuring the underlying SDK client used by built-in providers, you can use their respective Provider Classes directly. -!!! tip "This can be used to retrieve values from other regions, change the retry behavior, etc." +???+ tip + This can be used to retrieve values from other regions, change the retry behavior, etc. #### SSMProvider -=== "ssm_parameter_store.py" - - ```python hl_lines="5 9 12" - from aws_lambda_powertools.utilities import parameters - from botocore.config import Config +```python hl_lines="5 9 12" title="Example with SSMProvider for further extensibility" +from aws_lambda_powertools.utilities import parameters +from botocore.config import Config - config = Config(region_name="us-west-1") - ssm_provider = parameters.SSMProvider(config=config) +config = Config(region_name="us-west-1") +ssm_provider = parameters.SSMProvider(config=config) # or boto3_session=boto3.Session() - def handler(event, context): - # Retrieve a single parameter - value = ssm_provider.get("/my/parameter") +def handler(event, context): + # Retrieve a single parameter + value = ssm_provider.get("/my/parameter") - # Retrieve multiple parameters from a path prefix - values = ssm_provider.get_multiple("/my/path/prefix") - for k, v in values.items(): - print(f"{k}: {v}") - ``` + # Retrieve multiple parameters from a path prefix + values = ssm_provider.get_multiple("/my/path/prefix") + for k, v in values.items(): + print(f"{k}: {v}") +``` The AWS Systems Manager Parameter Store provider supports two additional arguments for the `get()` and `get_multiple()` methods: @@ -161,118 +152,114 @@ The AWS Systems Manager Parameter Store provider supports two additional argumen | **decrypt** | `False` | Will automatically decrypt the parameter. | **recursive** | `True` | For `get_multiple()` only, will fetch all parameter values recursively based on a path prefix. -> **Example** - -=== "ssm_parameter_store.py" +```python hl_lines="6 8" title="Example with get() and get_multiple()" +from aws_lambda_powertools.utilities import parameters - ```python hl_lines="6 8" - from aws_lambda_powertools.utilities import parameters - - ssm_provider = parameters.SSMProvider() +ssm_provider = parameters.SSMProvider() - def handler(event, context): - decrypted_value = ssm_provider.get("/my/encrypted/parameter", decrypt=True) +def handler(event, context): + decrypted_value = ssm_provider.get("/my/encrypted/parameter", decrypt=True) - no_recursive_values = ssm_provider.get_multiple("/my/path/prefix", recursive=False) - ``` + no_recursive_values = ssm_provider.get_multiple("/my/path/prefix", recursive=False) +``` #### SecretsProvider -=== "secrets_manager.py" +```python hl_lines="5 9" title="Example with SecretsProvider for further extensibility" +from aws_lambda_powertools.utilities import parameters +from botocore.config import Config - ```python hl_lines="5 9" - from aws_lambda_powertools.utilities import parameters - from botocore.config import Config +config = Config(region_name="us-west-1") +secrets_provider = parameters.SecretsProvider(config=config) - config = Config(region_name="us-west-1") - secrets_provider = parameters.SecretsProvider(config=config) - - def handler(event, context): - # Retrieve a single secret - value = secrets_provider.get("my-secret") - ``` +def handler(event, context): + # Retrieve a single secret + value = secrets_provider.get("my-secret") +``` #### DynamoDBProvider The DynamoDB Provider does not have any high-level functions, as it needs to know the name of the DynamoDB table containing the parameters. -**Local testing with DynamoDB Local** +**DynamoDB table structure for single parameters** -You can initialize the DynamoDB provider pointing to [DynamoDB Local](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html) using **`endpoint_url`** parameter: +For single parameters, you must use `id` as the [partition key](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.CoreComponents.html#HowItWorks.CoreComponents.PrimaryKey) for that table. -=== "dynamodb_local.py" +???+ example - ```python hl_lines="3" - from aws_lambda_powertools.utilities import parameters + DynamoDB table with `id` partition key and `value` as attribute - dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table", endpoint_url="http://localhost:8000") - ``` - -**DynamoDB table structure for single parameters** + | id | value | + |--------------|----------| + | my-parameter | my-value | -For single parameters, you must use `id` as the [partition key](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.CoreComponents.html#HowItWorks.CoreComponents.PrimaryKey) for that table. +With this table, `dynamodb_provider.get("my-param")` will return `my-value`. -| id | value | -|--------------|----------| -| my-parameter | my-value | +=== "app.py" + ```python hl_lines="3 7" + from aws_lambda_powertools.utilities import parameters -> **Example** + dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table") -=== "app.py" - With this table, the return value of `dynamodb_provider.get("my-param")` call will be `my-value`. + def handler(event, context): + # Retrieve a value from DynamoDB + value = dynamodb_provider.get("my-parameter") + ``` - ```python hl_lines="3 7" - from aws_lambda_powertools.utilities import parameters +=== "DynamoDB Local example" + You can initialize the DynamoDB provider pointing to [DynamoDB Local](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/DynamoDBLocal.html) using `endpoint_url` parameter: - dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table") + ```python hl_lines="3" + from aws_lambda_powertools.utilities import parameters - def handler(event, context): - # Retrieve a value from DynamoDB - value = dynamodb_provider.get("my-parameter") - ``` + dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table", endpoint_url="http://localhost:8000") + ``` **DynamoDB table structure for multiple values parameters** -If you want to be able to retrieve multiple parameters at once sharing the same `id`, your table needs to contain a sort key name `sk`. +You can retrieve multiple parameters sharing the same `id` by having a sort key named `sk`. -For example, if you want to retrieve multiple parameters having `my-hash-key` as ID: +???+ example -| id | sk | value | -|-------------|---------|------------| -| my-hash-key | param-a | my-value-a | -| my-hash-key | param-b | my-value-b | -| my-hash-key | param-c | my-value-c | + DynamoDB table with `id` primary key, `sk` as sort key` and `value` as attribute -With this table, the return of `dynamodb_provider.get_multiple("my-hash-key")` call will be a dictionary like: + | id | sk | value | + |-------------|---------|------------| + | my-hash-key | param-a | my-value-a | + | my-hash-key | param-b | my-value-b | + | my-hash-key | param-c | my-value-c | -```json -{ - "param-a": "my-value-a", - "param-b": "my-value-b", - "param-c": "my-value-c" -} -``` +With this table, `dynamodb_provider.get_multiple("my-hash-key")` will return a dictionary response in the shape of `sk:value`. -> **Example** +=== "app.py" + ```python hl_lines="3 8" + from aws_lambda_powertools.utilities import parameters -=== "app_multiple_parameters.py" + dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table") - ```python hl_lines="3 8" - from aws_lambda_powertools.utilities import parameters + def handler(event, context): + # Retrieve multiple values by performing a Query on the DynamoDB table + # This returns a dict with the sort key attribute as dict key. + parameters = dynamodb_provider.get_multiple("my-hash-key") + for k, v in parameters.items(): + # k: param-a + # v: "my-value-a" + print(f"{k}: {v}") + ``` - dynamodb_provider = parameters.DynamoDBProvider(table_name="my-table") +=== "parameters dict response" - def handler(event, context): - # Retrieve multiple values by performing a Query on the DynamoDB table - # This returns a dict with the sort key attribute as dict key. - values = dynamodb_provider.get_multiple("my-hash-key") - for k, v in values.items(): - print(f"{k}: {v}") - ``` + ```json + { + "param-a": "my-value-a", + "param-b": "my-value-b", + "param-c": "my-value-c" + } + ``` -**Additional arguments** +**Customizing DynamoDBProvider** -The DynamoDB provider supports four additional arguments at initialization. These can be used if you require a custom table structure: +DynamoDB provider can be customized at initialization to match your table structure: | Parameter | Mandatory | Default | Description | |----------------|-----------|---------|-------------| @@ -281,39 +268,33 @@ The DynamoDB provider supports four additional arguments at initialization. Thes | **sort_attr** | No | `sk` | Range key for the DynamoDB table. You don't need to set this if you don't use the `get_multiple()` method. | **value_attr** | No | `value` | Name of the attribute containing the parameter value. -> **Example** +```python hl_lines="3-8" title="Customizing DynamoDBProvider to suit your table design" +from aws_lambda_powertools.utilities import parameters -=== "app.py" - - ```python hl_lines="3-8" - from aws_lambda_powertools.utilities import parameters +dynamodb_provider = parameters.DynamoDBProvider( + table_name="my-table", + key_attr="MyKeyAttr", + sort_attr="MySortAttr", + value_attr="MyvalueAttr" +) - dynamodb_provider = parameters.DynamoDBProvider( - table_name="my-table", - key_attr="MyKeyAttr", - sort_attr="MySortAttr", - value_attr="MyvalueAttr" - ) - - def handler(event, context): - value = dynamodb_provider.get("my-parameter") - ``` +def handler(event, context): + value = dynamodb_provider.get("my-parameter") +``` #### AppConfigProvider -=== "app.py" - - ```python hl_lines="5 9" - from aws_lambda_powertools.utilities import parameters - from botocore.config import Config +```python hl_lines="5 9" title="Using AppConfigProvider" +from aws_lambda_powertools.utilities import parameters +from botocore.config import Config - config = Config(region_name="us-west-1") - appconf_provider = parameters.AppConfigProvider(environment="my_env", application="my_app", config=config) +config = Config(region_name="us-west-1") +appconf_provider = parameters.AppConfigProvider(environment="my_env", application="my_app", config=config) - def handler(event, context): - # Retrieve a single secret - value: bytes = appconf_provider.get("my_conf") - ``` +def handler(event, context): + # Retrieve a single secret + value: bytes = appconf_provider.get("my_conf") +``` ### Create your own provider @@ -323,67 +304,65 @@ All transformation and caching logic is handled by the `get()` and `get_multiple Here is an example implementation using S3 as a custom parameter store: -=== "custom_provider.py" +```python hl_lines="3 6 17 27" title="Creating a S3 Provider to fetch parameters" +import copy - ```python hl_lines="3 6 17 27" - import copy +from aws_lambda_powertools.utilities import BaseProvider +import boto3 - from aws_lambda_powertools.utilities import BaseProvider - import boto3 +class S3Provider(BaseProvider): + bucket_name = None + client = None - class S3Provider(BaseProvider): - bucket_name = None - client = None + def __init__(self, bucket_name: str): + # Initialize the client to your custom parameter store + # E.g.: - def __init__(self, bucket_name: str): - # Initialize the client to your custom parameter store - # E.g.: + self.bucket_name = bucket_name + self.client = boto3.client("s3") - self.bucket_name = bucket_name - self.client = boto3.client("s3") + def _get(self, name: str, **sdk_options) -> str: + # Retrieve a single value + # E.g.: - def _get(self, name: str, **sdk_options) -> str: - # Retrieve a single value - # E.g.: + sdk_options["Bucket"] = self.bucket_name + sdk_options["Key"] = name - sdk_options["Bucket"] = self.bucket_name - sdk_options["Key"] = name + response = self.client.get_object(**sdk_options) + return - response = self.client.get_object(**sdk_options) - return + def _get_multiple(self, path: str, **sdk_options) -> Dict[str, str]: + # Retrieve multiple values + # E.g.: - def _get_multiple(self, path: str, **sdk_options) -> Dict[str, str]: - # Retrieve multiple values - # E.g.: + list_sdk_options = copy.deepcopy(sdk_options) - list_sdk_options = copy.deepcopy(sdk_options) + list_sdk_options["Bucket"] = self.bucket_name + list_sdk_options["Prefix"] = path - list_sdk_options["Bucket"] = self.bucket_name - list_sdk_options["Prefix"] = path + list_response = self.client.list_objects_v2(**list_sdk_options) - list_response = self.client.list_objects_v2(**list_sdk_options) + parameters = {} - parameters = {} + for obj in list_response.get("Contents", []): + get_sdk_options = copy.deepcopy(sdk_options) - for obj in list_response.get("Contents", []): - get_sdk_options = copy.deepcopy(sdk_options) + get_sdk_options["Bucket"] = self.bucket_name + get_sdk_options["Key"] = obj["Key"] - get_sdk_options["Bucket"] = self.bucket_name - get_sdk_options["Key"] = obj["Key"] + get_response = self.client.get_object(**get_sdk_options) - get_response = self.client.get_object(**get_sdk_options) + parameters[obj["Key"]] = get_response["Body"].read().decode() - parameters[obj["Key"]] = get_response["Body"].read().decode() - - return parameters - - ``` + return parameters +``` ### Deserializing values with transform parameter For parameters stored in JSON or Base64 format, you can use the `transform` argument for deserialization. -!!! info "The `transform` argument is available across all providers, including the high level functions" +???+ info + The `transform` argument is available across all providers, including the high level functions. === "High level functions" @@ -417,25 +396,26 @@ You can override this by setting the `raise_on_transform_error` argument to `Tru For example, if you have three parameters, */param/a*, */param/b* and */param/c*, but */param/c* is malformed: -=== "partial_failures.py" - - ```python hl_lines="9 14-15" - from aws_lambda_powertools.utilities import parameters - - ssm_provider = parameters.SSMProvider() - - def handler(event, context): - # This will display: - # /param/a: [some value] - # /param/b: [some value] - # /param/c: None - values = ssm_provider.get_multiple("/param", transform="json") - for k, v in values.items(): - print(f"{k}: {v}") - - # This will raise a TransformParameterError exception - values = ssm_provider.get_multiple("/param", transform="json", raise_on_transform_error=True) - ``` +```python hl_lines="9 16" title="Raising TransformParameterError at first malformed parameter" +from aws_lambda_powertools.utilities import parameters + +ssm_provider = parameters.SSMProvider() + +def handler(event, context): + # This will display: + # /param/a: [some value] + # /param/b: [some value] + # /param/c: None + values = ssm_provider.get_multiple("/param", transform="json") + for k, v in values.items(): + print(f"{k}: {v}") + + try: + # This will raise a TransformParameterError exception + values = ssm_provider.get_multiple("/param", transform="json", raise_on_transform_error=True) + except parameters.exceptions.TransformParameterError: + ... +``` #### Auto-transform values on suffix @@ -443,18 +423,17 @@ If you use `transform` with `get_multiple()`, you might want to retrieve and tra You can do this with a single request by using `transform="auto"`. This will instruct any Parameter to to infer its type based on the suffix and transform it accordingly. -!!! info "`transform="auto"` feature is available across all providers, including the high level functions" +???+ info + `transform="auto"` feature is available across all providers, including the high level functions. -=== "transform_auto.py" +```python hl_lines="6" title="Deserializing parameter values based on their suffix" +from aws_lambda_powertools.utilities import parameters - ```python hl_lines="6" - from aws_lambda_powertools.utilities import parameters +ssm_provider = parameters.SSMProvider() - ssm_provider = parameters.SSMProvider() - - def handler(event, context): - values = ssm_provider.get_multiple("/param", transform="auto") - ``` +def handler(event, context): + values = ssm_provider.get_multiple("/param", transform="auto") +``` For example, if you have two parameters with the following suffixes `.json` and `.binary`: @@ -476,17 +455,15 @@ The return of `ssm_provider.get_multiple("/param", transform="auto")` call will You can use arbitrary keyword arguments to pass it directly to the underlying SDK method. -=== "ssm_parameter_store.py" - - ```python hl_lines="7" - from aws_lambda_powertools.utilities import parameters +```python hl_lines="8" title="" +from aws_lambda_powertools.utilities import parameters - secrets_provider = parameters.SecretsProvider() +secrets_provider = parameters.SecretsProvider() - def handler(event, context): - # The 'VersionId' argument will be passed to the underlying get_secret_value() call. - value = secrets_provider.get("my-secret", VersionId="e62ec170-6b01-48c7-94f3-d7497851a8d2") - ``` +def handler(event, context): + # The 'VersionId' argument will be passed to the underlying get_secret_value() call. + value = secrets_provider.get("my-secret", VersionId="e62ec170-6b01-48c7-94f3-d7497851a8d2") +``` Here is the mapping between this utility's functions and methods and the underlying SDK: @@ -507,34 +484,34 @@ Here is the mapping between this utility's functions and methods and the underly The **`config`** and **`boto3_session`** parameters enable you to pass in a custom [botocore config object](https://botocore.amazonaws.com/v1/documentation/api/latest/reference/config.html) or a custom [boto3 session](https://boto3.amazonaws.com/v1/documentation/api/latest/reference/core/session.html) when constructing any of the built-in provider classes. -> **Example** - +???+ tip + You can use a custom session for retrieving parameters cross-account/region and for snapshot testing. === "Custom session" - ```python hl_lines="2 4 5" - from aws_lambda_powertools.utilities import parameters - import boto3 + ```python hl_lines="2 4 5" + from aws_lambda_powertools.utilities import parameters + import boto3 - boto3_session = boto3.session.Session() - ssm_provider = parameters.SSMProvider(boto3_session=boto3_session) + boto3_session = boto3.session.Session() + ssm_provider = parameters.SSMProvider(boto3_session=boto3_session) - def handler(event, context): - # Retrieve a single parameter - value = ssm_provider.get("/my/parameter") - ... - ``` + def handler(event, context): + # Retrieve a single parameter + value = ssm_provider.get("/my/parameter") + ... + ``` === "Custom config" - ```python hl_lines="2 4 5" - from aws_lambda_powertools.utilities import parameters - from botocore.config import Config + ```python hl_lines="2 4 5" + from aws_lambda_powertools.utilities import parameters + from botocore.config import Config - boto_config = Config() - ssm_provider = parameters.SSMProvider(config=boto_config) + boto_config = Config() + ssm_provider = parameters.SSMProvider(config=boto_config) - def handler(event, context): - # Retrieve a single parameter - value = ssm_provider.get("/my/parameter") - ... - ``` + def handler(event, context): + # Retrieve a single parameter + value = ssm_provider.get("/my/parameter") + ... + ``` diff --git a/docs/utilities/parser.md b/docs/utilities/parser.md index 7c9af95896f..37f86cbf4f5 100644 --- a/docs/utilities/parser.md +++ b/docs/utilities/parser.md @@ -13,7 +13,7 @@ This utility provides data parsing and deep validation using [Pydantic](https:// **Extra dependency** -!!! warning +???+ warning This will increase the overall package size by approximately 75MB due to Pydantic dependency. Install parser's extra dependencies using **`pip install aws-lambda-powertools[pydantic]`**. @@ -22,23 +22,21 @@ Install parser's extra dependencies using **`pip install aws-lambda-powertools[p You can define models to parse incoming events by inheriting from `BaseModel`. -=== "hello_world_model.py" +```python title="Defining an Order data model" +from aws_lambda_powertools.utilities.parser import BaseModel +from typing import List, Optional - ```python - from aws_lambda_powertools.utilities.parser import BaseModel - from typing import List, Optional - - class OrderItem(BaseModel): - id: int - quantity: int - description: str - - class Order(BaseModel): - id: int - description: str - items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing - ``` +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing +``` These are simply Python classes that inherit from BaseModel. **Parser** enforces type hints declared in your model at runtime. @@ -52,100 +50,97 @@ Use the decorator for fail fast scenarios where you want your Lambda function to `event_parser` decorator will throw a `ValidationError` if your event cannot be parsed according to the model. -> NOTE: **This decorator will replace the `event` object with the parsed model if successful**. This means you might be careful when nesting other decorators that expect `event` to be a `dict`. - -=== "event_parser_decorator.py" - - ```python hl_lines="18" - from aws_lambda_powertools.utilities.parser import event_parser, BaseModel - from aws_lambda_powertools.utilities.typing import LambdaContext - from typing import List, Optional - - import json - - class OrderItem(BaseModel): - id: int - quantity: int - description: str - - class Order(BaseModel): - id: int - description: str - items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing - - - @event_parser(model=Order) - def handler(event: Order, context: LambdaContext): - print(event.id) - print(event.description) - print(event.items) - - order_items = [item for item in event.items] - ... - - payload = { - "id": 10876546789, - "description": "My order", - "items": [ - { - "id": 1015938732, - "quantity": 1, - "description": "item xpto" - } - ] - } - - handler(event=payload, context=LambdaContext()) - handler(event=json.dumps(payload), context=LambdaContext()) # also works if event is a JSON string - ``` +???+ note + **This decorator will replace the `event` object with the parsed model if successful**. This means you might be careful when nesting other decorators that expect `event` to be a `dict`. + +```python hl_lines="18" title="Parsing and validating upon invocation with event_parser decorator" +from aws_lambda_powertools.utilities.parser import event_parser, BaseModel +from aws_lambda_powertools.utilities.typing import LambdaContext +from typing import List, Optional + +import json + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing + + +@event_parser(model=Order) +def handler(event: Order, context: LambdaContext): + print(event.id) + print(event.description) + print(event.items) + + order_items = [item for item in event.items] + ... + +payload = { + "id": 10876546789, + "description": "My order", + "items": [ + { + "id": 1015938732, + "quantity": 1, + "description": "item xpto" + } + ] +} + +handler(event=payload, context=LambdaContext()) +handler(event=json.dumps(payload), context=LambdaContext()) # also works if event is a JSON string +``` ### parse function Use this standalone function when you want more control over the data validation process, for example returning a 400 error for malformed payloads. -=== "parse_standalone_example.py" - - ```python hl_lines="21 30" - from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError - from typing import List, Optional - - class OrderItem(BaseModel): - id: int - quantity: int - description: str - - class Order(BaseModel): - id: int - description: str - items: List[OrderItem] # nesting models are supported - optional_field: Optional[str] # this field may or may not be available when parsing - - - payload = { - "id": 10876546789, - "description": "My order", - "items": [ - { - # this will cause a validation error - "id": [1015938732], - "quantity": 1, - "description": "item xpto" - } - ] - } - - def my_function(): - try: - parsed_payload: Order = parse(event=payload, model=Order) - # payload dict is now parsed into our model - return parsed_payload.items - except ValidationError: - return { - "status_code": 400, - "message": "Invalid order" - } - ``` +```python hl_lines="21 30" title="Using standalone parse function for more flexibility" +from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError +from typing import List, Optional + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] # nesting models are supported + optional_field: Optional[str] # this field may or may not be available when parsing + + +payload = { + "id": 10876546789, + "description": "My order", + "items": [ + { + # this will cause a validation error + "id": [1015938732], + "quantity": 1, + "description": "item xpto" + } + ] +} + +def my_function(): + try: + parsed_payload: Order = parse(event=payload, model=Order) + # payload dict is now parsed into our model + return parsed_payload.items + except ValidationError: + return { + "status_code": 400, + "message": "Invalid order" + } +``` ## Built-in models @@ -170,60 +165,60 @@ Parser comes with the following built-in models: You can extend them to include your own models, and yet have all other known fields parsed along the way. -**EventBridge example** - -=== "extending_builtin_models.py" - - ```python hl_lines="16-17 28 41" - from aws_lambda_powertools.utilities.parser import parse, BaseModel - from aws_lambda_powertools.utilities.parser.models import EventBridgeModel - - from typing import List, Optional - - class OrderItem(BaseModel): - id: int - quantity: int - description: str - - class Order(BaseModel): - id: int - description: str - items: List[OrderItem] - - class OrderEventModel(EventBridgeModel): - detail: Order - - payload = { - "version": "0", - "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", - "detail-type": "OrderPurchased", - "source": "OrderService", - "account": "111122223333", - "time": "2020-10-22T18:43:48Z", - "region": "us-west-1", - "resources": ["some_additional"], - "detail": { - "id": 10876546789, - "description": "My order", - "items": [ - { - "id": 1015938732, - "quantity": 1, - "description": "item xpto" - } - ] - } - } - - ret = parse(model=OrderEventModel, event=payload) - - assert ret.source == "OrderService" - assert ret.detail.description == "My order" - assert ret.detail_type == "OrderPurchased" # we rename it to snake_case since detail-type is an invalid name - - for order_item in ret.detail.items: - ... - ``` +???+ tip + For Mypy users, we only allow type override for fields where payload is injected e.g. `detail`, `body`, etc. + + +```python hl_lines="16-17 28 41" title="Extending EventBridge model as an example" +from aws_lambda_powertools.utilities.parser import parse, BaseModel +from aws_lambda_powertools.utilities.parser.models import EventBridgeModel + +from typing import List, Optional + +class OrderItem(BaseModel): + id: int + quantity: int + description: str + +class Order(BaseModel): + id: int + description: str + items: List[OrderItem] + +class OrderEventModel(EventBridgeModel): + detail: Order + +payload = { + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail-type": "OrderPurchased", + "source": "OrderService", + "account": "111122223333", + "time": "2020-10-22T18:43:48Z", + "region": "us-west-1", + "resources": ["some_additional"], + "detail": { + "id": 10876546789, + "description": "My order", + "items": [ + { + "id": 1015938732, + "quantity": 1, + "description": "item xpto" + } + ] + } +} + +ret = parse(model=OrderEventModel, event=payload) + +assert ret.source == "OrderService" +assert ret.detail.description == "My order" +assert ret.detail_type == "OrderPurchased" # we rename it to snake_case since detail-type is an invalid name + +for order_item in ret.detail.items: + ... +``` **What's going on here, you might ask**: @@ -247,43 +242,41 @@ Envelopes can be used via `envelope` parameter available in both `parse` functio Here's an example of parsing a model found in an event coming from EventBridge, where all you want is what's inside the `detail` key. -=== "parse_eventbridge_payload.py" - - ```python hl_lines="18-22 25 31" - from aws_lambda_powertools.utilities.parser import event_parser, parse, BaseModel, envelopes - from aws_lambda_powertools.utilities.typing import LambdaContext - - class UserModel(BaseModel): - username: str - password1: str - password2: str - - payload = { - "version": "0", - "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", - "detail-type": "CustomerSignedUp", - "source": "CustomerService", - "account": "111122223333", - "time": "2020-10-22T18:43:48Z", - "region": "us-west-1", - "resources": ["some_additional_"], - "detail": { - "username": "universe", - "password1": "myp@ssword", - "password2": "repeat password" - } - } - - ret = parse(model=UserModel, envelope=envelopes.EventBridgeEnvelope, event=payload) - - # Parsed model only contains our actual model, not the entire EventBridge + Payload parsed - assert ret.password1 == ret.password2 - - # Same behaviour but using our decorator - @event_parser(model=UserModel, envelope=envelopes.EventBridgeEnvelope) - def handler(event: UserModel, context: LambdaContext): - assert event.password1 == event.password2 - ``` +```python hl_lines="18-22 25 31" title="Parsing payload in a given key only using envelope feature" +from aws_lambda_powertools.utilities.parser import event_parser, parse, BaseModel, envelopes +from aws_lambda_powertools.utilities.typing import LambdaContext + +class UserModel(BaseModel): + username: str + password1: str + password2: str + +payload = { + "version": "0", + "id": "6a7e8feb-b491-4cf7-a9f1-bf3703467718", + "detail-type": "CustomerSignedUp", + "source": "CustomerService", + "account": "111122223333", + "time": "2020-10-22T18:43:48Z", + "region": "us-west-1", + "resources": ["some_additional_"], + "detail": { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" + } +} + +ret = parse(model=UserModel, envelope=envelopes.EventBridgeEnvelope, event=payload) + +# Parsed model only contains our actual model, not the entire EventBridge + Payload parsed +assert ret.password1 == ret.password2 + +# Same behaviour but using our decorator +@event_parser(model=UserModel, envelope=envelopes.EventBridgeEnvelope) +def handler(event: UserModel, context: LambdaContext): + assert event.password1 == event.password2 +``` **What's going on here, you might ask**: @@ -314,9 +307,7 @@ You can create your own Envelope model and logic by inheriting from `BaseEnvelop Here's a snippet of how the EventBridge envelope we demonstrated previously is implemented. -**EventBridge Model** - -=== "eventbridge_model.py" +=== "EventBridge Model" ```python from datetime import datetime @@ -337,9 +328,7 @@ Here's a snippet of how the EventBridge envelope we demonstrated previously is i detail: Dict[str, Any] ``` -**EventBridge Envelope** - -=== "eventbridge_envelope.py" +=== "EventBridge Envelope" ```python hl_lines="8 10 25 26" from aws_lambda_powertools.utilities.parser import BaseEnvelope, models @@ -379,7 +368,7 @@ Here's a snippet of how the EventBridge envelope we demonstrated previously is i ## Data model validation -!!! warning +???+ warning This is radically different from the **Validator utility** which validates events against JSON Schema. You can use parser's validator for deep inspection of object values and complex relationships. @@ -398,130 +387,120 @@ Keep the following in mind regardless of which decorator you end up using it: Quick validation to verify whether the field `message` has the value of `hello world`. -=== "deep_data_validation.py" - - ```python hl_lines="6" - from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator +```python hl_lines="6" title="Data field validation with validator" +from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator - class HelloWorldModel(BaseModel): - message: str +class HelloWorldModel(BaseModel): + message: str - @validator('message') - def is_hello_world(cls, v): - if v != "hello world": - raise ValueError("Message must be hello world!") - return v + @validator('message') + def is_hello_world(cls, v): + if v != "hello world": + raise ValueError("Message must be hello world!") + return v - parse(model=HelloWorldModel, event={"message": "hello universe"}) - ``` +parse(model=HelloWorldModel, event={"message": "hello universe"}) +``` If you run as-is, you should expect the following error with the message we provided in our exception: -``` +```python title="Sample validation error message" message Message must be hello world! (type=value_error) ``` Alternatively, you can pass `'*'` as an argument for the decorator so that you can validate every value available. -=== "validate_all_field_values.py" - - ```python hl_lines="7" - from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator +```python hl_lines="7" title="Validating all data fields with custom logic" +from aws_lambda_powertools.utilities.parser import parse, BaseModel, validator - class HelloWorldModel(BaseModel): - message: str - sender: str +class HelloWorldModel(BaseModel): + message: str + sender: str - @validator('*') - def has_whitespace(cls, v): - if ' ' not in v: - raise ValueError("Must have whitespace...") + @validator('*') + def has_whitespace(cls, v): + if ' ' not in v: + raise ValueError("Must have whitespace...") - return v + return v - parse(model=HelloWorldModel, event={"message": "hello universe", "sender": "universe"}) - ``` +parse(model=HelloWorldModel, event={"message": "hello universe", "sender": "universe"}) +``` ### validating entire model `root_validator` can help when you have a complex validation mechanism. For example finding whether data has been omitted, comparing field values, etc. -=== "validate_all_field_values.py" +```python title="Comparing and validating multiple fields at once with root_validator" +from aws_lambda_powertools.utilities.parser import parse, BaseModel, root_validator - ```python - from aws_lambda_powertools.utilities.parser import parse, BaseModel, root_validator - - class UserModel(BaseModel): - username: str - password1: str - password2: str - - @root_validator - def check_passwords_match(cls, values): - pw1, pw2 = values.get('password1'), values.get('password2') - if pw1 is not None and pw2 is not None and pw1 != pw2: - raise ValueError('passwords do not match') - return values - - payload = { - "username": "universe", - "password1": "myp@ssword", - "password2": "repeat password" - } - - parse(model=UserModel, event=payload) - ``` +class UserModel(BaseModel): + username: str + password1: str + password2: str + + @root_validator + def check_passwords_match(cls, values): + pw1, pw2 = values.get('password1'), values.get('password2') + if pw1 is not None and pw2 is not None and pw1 != pw2: + raise ValueError('passwords do not match') + return values + +payload = { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" +} -!!! info +parse(model=UserModel, event=payload) +``` + +???+ info You can read more about validating list items, reusing validators, validating raw inputs, and a lot more in Pydantic's documentation. ## Advanced use cases -!!! info - **Looking to auto-generate models from JSON, YAML, JSON Schemas, OpenApi, etc?** - +???+ tip "Tip: Looking to auto-generate models from JSON, YAML, JSON Schemas, OpenApi, etc?" Use Koudai Aono's [data model code generation tool for Pydantic](https://github.com/koxudaxi/datamodel-code-generator) There are number of advanced use cases well documented in Pydantic's doc such as creating [immutable models](https://pydantic-docs.helpmanual.io/usage/models/#faux-immutability), [declaring fields with dynamic values](https://pydantic-docs.helpmanual.io/usage/models/#field-with-dynamic-default-value)) e.g. UUID, and [helper functions to parse models from files, str](https://pydantic-docs.helpmanual.io/usage/models/#helper-functions), etc. Two possible unknown use cases are Models and exception' serialization. Models have methods to [export them](https://pydantic-docs.helpmanual.io/usage/exporting_models/) as `dict`, `JSON`, `JSON Schema`, and Validation exceptions can be exported as JSON. -=== "serializing_models_exceptions.py" - - ```python hl_lines="21 28-31" - from aws_lambda_powertools.utilities import Logger - from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError, validator - - logger = Logger(service="user") - - class UserModel(BaseModel): - username: str - password1: str - password2: str - - payload = { - "username": "universe", - "password1": "myp@ssword", - "password2": "repeat password" - } - - def my_function(): - try: - return parse(model=UserModel, event=payload) - except ValidationError as e: - logger.exception(e.json()) - return { - "status_code": 400, - "message": "Invalid username" - } - - User: UserModel = my_function() - user_dict = User.dict() - user_json = User.json() - user_json_schema_as_dict = User.schema() - user_json_schema_as_json = User.schema_json(indent=2) - ``` +```python hl_lines="21 28-31" title="Converting data models in various formats" +from aws_lambda_powertools.utilities import Logger +from aws_lambda_powertools.utilities.parser import parse, BaseModel, ValidationError, validator + +logger = Logger(service="user") + +class UserModel(BaseModel): + username: str + password1: str + password2: str + +payload = { + "username": "universe", + "password1": "myp@ssword", + "password2": "repeat password" +} + +def my_function(): + try: + return parse(model=UserModel, event=payload) + except ValidationError as e: + logger.exception(e.json()) + return { + "status_code": 400, + "message": "Invalid username" + } + +User: UserModel = my_function() +user_dict = User.dict() +user_json = User.json() +user_json_schema_as_dict = User.schema() +user_json_schema_as_json = User.schema_json(indent=2) +``` These can be quite useful when manipulating models that later need to be serialized as inputs for services like DynamoDB, EventBridge, etc. @@ -539,11 +518,9 @@ We export most common classes, exceptions, and utilities from Pydantic as part o If what's your trying to use isn't available as part of the high level import system, use the following escape hatch mechanism: -=== "escape_hatch.py" - - ```python - from aws_lambda_powertools.utilities.parser.pydantic import - ``` +```python title="Pydantic import escape hatch" +from aws_lambda_powertools.utilities.parser.pydantic import +``` **What is the cold start impact in bringing this additional dependency?** @@ -553,7 +530,8 @@ Artillery load test sample against a [hello world sample](https://github.com/aws **No parser** -> **Uncompressed package size**: 55M, **p99**: 180.3ms +???+ info + **Uncompressed package size**: 55M, **p99**: 180.3ms ``` Summary report @ 14:36:07(+0200) 2020-10-23 @@ -575,7 +553,8 @@ Codes: **With parser** -> **Uncompressed package size**: 128M, **p99**: 193.1ms +???+ info + **Uncompressed package size**: 128M, **p99**: 193.1ms ``` Summary report @ 14:29:23(+0200) 2020-10-23 diff --git a/docs/utilities/typing.md b/docs/utilities/typing.md index 87166520e70..c1b4dbad32b 100644 --- a/docs/utilities/typing.md +++ b/docs/utilities/typing.md @@ -11,13 +11,11 @@ This typing utility provides static typing classes that can be used to ease the The `LambdaContext` typing is typically used in the handler method for the Lambda function. -=== "index.py" +```python hl_lines="4" title="Annotating Lambda context type" +from typing import Any, Dict +from aws_lambda_powertools.utilities.typing import LambdaContext - ```python hl_lines="4" - from typing import Any, Dict - from aws_lambda_powertools.utilities.typing import LambdaContext - - def handler(event: Dict[str, Any], context: LambdaContext) -> Dict[str, Any]: - # Insert business logic - return event - ``` +def handler(event: Dict[str, Any], context: LambdaContext) -> Dict[str, Any]: + # Insert business logic + return event +``` diff --git a/docs/utilities/validation.md b/docs/utilities/validation.md index 73f1e085164..e6ca0841d2d 100644 --- a/docs/utilities/validation.md +++ b/docs/utilities/validation.md @@ -13,7 +13,7 @@ This utility provides JSON Schema validation for events and responses, including ## Getting started -!!! tip "Using JSON Schemas for the first time?" +???+ tip "Tip: Using JSON Schemas for the first time?" Check this [step-by-step tour in the official JSON Schema website](https://json-schema.org/learn/getting-started-step-by-step.html){target="_blank"}. You can validate inbound and outbound events using [`validator` decorator](#validator-decorator). @@ -22,7 +22,7 @@ You can also use the standalone `validate` function, if you want more control ov We support any JSONSchema draft supported by [fastjsonschema](https://horejsek.github.io/python-fastjsonschema/){target="_blank"} library. -!!! warning +???+ warning Both `validator` decorator and `validate` standalone function expects your JSON Schema to be a **dictionary**, not a filename. ### Validator decorator @@ -58,7 +58,7 @@ It will fail fast with `SchemaValidationError` exception if event or response do --8<-- "docs/shared/validation_basic_jsonschema.py" ``` -!!! note +???+ note It's not a requirement to validate both inbound and outbound schemas - You can either use one, or both. ### Validate function @@ -181,21 +181,19 @@ Envelope name | JMESPath expression ### Validating custom formats -!!! note "New in 1.10.0" +???+ note JSON Schema DRAFT 7 [has many new built-in formats](https://json-schema.org/understanding-json-schema/reference/string.html#format){target="_blank"} such as date, time, and specifically a regex format which might be a better replacement for a custom format, if you do have control over the schema. JSON Schemas with custom formats like `int64` will fail validation. If you have these, you can pass them using `formats` parameter: -=== "custom_json_schema_type_format.json" - - ```json - { - "lastModifiedTime": { - "format": "int64", - "type": "integer" - } - } - ``` +```json title="custom_json_schema_type_format.json" +{ + "lastModifiedTime": { + "format": "int64", + "type": "integer" + } +} +``` For each format defined in a dictionary key, you must use a regex, or a function that returns a boolean to instruct the validator on how to proceed when encountering that type. @@ -431,5 +429,5 @@ You might have events or responses that contain non-encoded JSON, where you need You can use our built-in [JMESPath functions](/utilities/jmespath_functions) within your expressions to do exactly that to decode JSON Strings, base64, and uncompress gzip data. -!!! info +???+ info We use these for built-in envelopes to easily to decode and unwrap events from sources like Kinesis, CloudWatch Logs, etc. diff --git a/mkdocs.yml b/mkdocs.yml index 54a0fa50a67..218deea586b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -27,6 +27,7 @@ nav: - utilities/idempotency.md - utilities/feature_flags.md - utilities/jmespath_functions.md + - CloudFormation Custom Resources: https://github.com/aws-cloudformation/custom-resource-helper" target="_blank theme: name: material @@ -48,6 +49,7 @@ theme: - navigation.sections - navigation.expand - navigation.top + - navigation.instant icon: repo: fontawesome/brands/github logo: media/aws-logo-light.svg diff --git a/poetry.lock b/poetry.lock index a003d0fb93b..6b71a28bc6f 100644 --- a/poetry.lock +++ b/poetry.lock @@ -205,7 +205,7 @@ python-versions = "*" [[package]] name = "fastjsonschema" -version = "2.15.1" +version = "2.15.2" description = "Fastest Python implementation of JSON schema" category = "main" optional = false @@ -597,21 +597,21 @@ python-versions = ">=3.6" [[package]] name = "mypy" -version = "0.910" +version = "0.930" description = "Optional static typing for Python" category = "dev" optional = false -python-versions = ">=3.5" +python-versions = ">=3.6" [package.dependencies] -mypy-extensions = ">=0.4.3,<0.5.0" -toml = "*" -typed-ast = {version = ">=1.4.0,<1.5.0", markers = "python_version < \"3.8\""} -typing-extensions = ">=3.7.4" +mypy-extensions = ">=0.4.3" +tomli = ">=1.1.0" +typed-ast = {version = ">=1.4.0,<2", markers = "python_version < \"3.8\""} +typing-extensions = ">=3.10" [package.extras] dmypy = ["psutil (>=4.0)"] -python2 = ["typed-ast (>=1.4.0,<1.5.0)"] +python2 = ["typed-ast (>=1.4.0,<2)"] [[package]] name = "mypy-extensions" @@ -1056,7 +1056,7 @@ pydantic = ["pydantic", "email-validator"] [metadata] lock-version = "1.1" python-versions = "^3.6.2" -content-hash = "d003a9b82e3692f6e55a5dde89dae18796d9c5747c9d097a0ec113ecb4e02f02" +content-hash = "364d0964de7151fa587584ef923d44440007f8e46933c038440c79f242e3e3fa" [metadata.files] atomicwrites = [ @@ -1168,8 +1168,8 @@ eradicate = [ {file = "eradicate-2.0.0.tar.gz", hash = "sha256:27434596f2c5314cc9b31410c93d8f7e8885747399773cd088d3adea647a60c8"}, ] fastjsonschema = [ - {file = "fastjsonschema-2.15.1-py3-none-any.whl", hash = "sha256:fa2f4bb1e31419c5eb1150f2e0545921712c10c34165b86d33f08f5562ad4b85"}, - {file = "fastjsonschema-2.15.1.tar.gz", hash = "sha256:671f36d225b3493629b5e789428660109528f373cf4b8a22bac6fa2f8191c2d2"}, + {file = "fastjsonschema-2.15.2-py3-none-any.whl", hash = "sha256:5fb095151a88b166e6cda6f527ce83775bf24b3d13e1adb67c690300f3fab0a1"}, + {file = "fastjsonschema-2.15.2.tar.gz", hash = "sha256:2a84755f22eb4c944c83c18d8f40705612376d178d2a5dfe50b0ecca3c11d610"}, ] flake8 = [ {file = "flake8-4.0.1-py2.py3-none-any.whl", hash = "sha256:479b1304f72536a55948cb40a32dce8bb0ffe3501e26eaf292c7e60eb5e0428d"}, @@ -1361,29 +1361,26 @@ mkdocs-material-extensions = [ {file = "mkdocs_material_extensions-1.0.3-py3-none-any.whl", hash = "sha256:a82b70e533ce060b2a5d9eb2bc2e1be201cf61f901f93704b4acf6e3d5983a44"}, ] mypy = [ - {file = "mypy-0.910-cp35-cp35m-macosx_10_9_x86_64.whl", hash = "sha256:a155d80ea6cee511a3694b108c4494a39f42de11ee4e61e72bc424c490e46457"}, - {file = "mypy-0.910-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:b94e4b785e304a04ea0828759172a15add27088520dc7e49ceade7834275bedb"}, - {file = "mypy-0.910-cp35-cp35m-manylinux2010_x86_64.whl", hash = "sha256:088cd9c7904b4ad80bec811053272986611b84221835e079be5bcad029e79dd9"}, - {file = "mypy-0.910-cp35-cp35m-win_amd64.whl", hash = "sha256:adaeee09bfde366d2c13fe6093a7df5df83c9a2ba98638c7d76b010694db760e"}, - {file = "mypy-0.910-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:ecd2c3fe726758037234c93df7e98deb257fd15c24c9180dacf1ef829da5f921"}, - {file = "mypy-0.910-cp36-cp36m-manylinux1_x86_64.whl", hash = "sha256:d9dd839eb0dc1bbe866a288ba3c1afc33a202015d2ad83b31e875b5905a079b6"}, - {file = "mypy-0.910-cp36-cp36m-manylinux2010_x86_64.whl", hash = "sha256:3e382b29f8e0ccf19a2df2b29a167591245df90c0b5a2542249873b5c1d78212"}, - {file = "mypy-0.910-cp36-cp36m-win_amd64.whl", hash = "sha256:53fd2eb27a8ee2892614370896956af2ff61254c275aaee4c230ae771cadd885"}, - {file = "mypy-0.910-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:b6fb13123aeef4a3abbcfd7e71773ff3ff1526a7d3dc538f3929a49b42be03f0"}, - {file = "mypy-0.910-cp37-cp37m-manylinux1_x86_64.whl", hash = "sha256:e4dab234478e3bd3ce83bac4193b2ecd9cf94e720ddd95ce69840273bf44f6de"}, - {file = "mypy-0.910-cp37-cp37m-manylinux2010_x86_64.whl", hash = "sha256:7df1ead20c81371ccd6091fa3e2878559b5c4d4caadaf1a484cf88d93ca06703"}, - {file = "mypy-0.910-cp37-cp37m-win_amd64.whl", hash = "sha256:0aadfb2d3935988ec3815952e44058a3100499f5be5b28c34ac9d79f002a4a9a"}, - {file = "mypy-0.910-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:ec4e0cd079db280b6bdabdc807047ff3e199f334050db5cbb91ba3e959a67504"}, - {file = "mypy-0.910-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:119bed3832d961f3a880787bf621634ba042cb8dc850a7429f643508eeac97b9"}, - {file = "mypy-0.910-cp38-cp38-manylinux2010_x86_64.whl", hash = "sha256:866c41f28cee548475f146aa4d39a51cf3b6a84246969f3759cb3e9c742fc072"}, - {file = "mypy-0.910-cp38-cp38-win_amd64.whl", hash = "sha256:ceb6e0a6e27fb364fb3853389607cf7eb3a126ad335790fa1e14ed02fba50811"}, - {file = "mypy-0.910-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:1a85e280d4d217150ce8cb1a6dddffd14e753a4e0c3cf90baabb32cefa41b59e"}, - {file = "mypy-0.910-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:42c266ced41b65ed40a282c575705325fa7991af370036d3f134518336636f5b"}, - {file = "mypy-0.910-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:3c4b8ca36877fc75339253721f69603a9c7fdb5d4d5a95a1a1b899d8b86a4de2"}, - {file = "mypy-0.910-cp39-cp39-manylinux2010_x86_64.whl", hash = "sha256:c0df2d30ed496a08de5daed2a9ea807d07c21ae0ab23acf541ab88c24b26ab97"}, - {file = "mypy-0.910-cp39-cp39-win_amd64.whl", hash = "sha256:c6c2602dffb74867498f86e6129fd52a2770c48b7cd3ece77ada4fa38f94eba8"}, - {file = "mypy-0.910-py3-none-any.whl", hash = "sha256:ef565033fa5a958e62796867b1df10c40263ea9ded87164d67572834e57a174d"}, - {file = "mypy-0.910.tar.gz", hash = "sha256:704098302473cb31a218f1775a873b376b30b4c18229421e9e9dc8916fd16150"}, + {file = "mypy-0.930-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:221cc94dc6a801ccc2be7c0c9fd791c5e08d1fa2c5e1c12dec4eab15b2469871"}, + {file = "mypy-0.930-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:db3a87376a1380f396d465bed462e76ea89f838f4c5e967d68ff6ee34b785c31"}, + {file = "mypy-0.930-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1d2296f35aae9802eeb1327058b550371ee382d71374b3e7d2804035ef0b830b"}, + {file = "mypy-0.930-cp310-cp310-win_amd64.whl", hash = "sha256:959319b9a3cafc33a8185f440a433ba520239c72e733bf91f9efd67b0a8e9b30"}, + {file = "mypy-0.930-cp36-cp36m-macosx_10_9_x86_64.whl", hash = "sha256:45a4dc21c789cfd09b8ccafe114d6de66f0b341ad761338de717192f19397a8c"}, + {file = "mypy-0.930-cp36-cp36m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:1e689e92cdebd87607a041585f1dc7339aa2e8a9f9bad9ba7e6ece619431b20c"}, + {file = "mypy-0.930-cp36-cp36m-win_amd64.whl", hash = "sha256:ed4e0ea066bb12f56b2812a15ff223c57c0a44eca817ceb96b214bb055c7051f"}, + {file = "mypy-0.930-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:a9d8dffefba634b27d650e0de2564379a1a367e2e08d6617d8f89261a3bf63b2"}, + {file = "mypy-0.930-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:b419e9721260161e70d054a15abbd50603c16f159860cfd0daeab647d828fc29"}, + {file = "mypy-0.930-cp37-cp37m-win_amd64.whl", hash = "sha256:601f46593f627f8a9b944f74fd387c9b5f4266b39abad77471947069c2fc7651"}, + {file = "mypy-0.930-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:1ea7199780c1d7940b82dbc0a4e37722b4e3851264dbba81e01abecc9052d8a7"}, + {file = "mypy-0.930-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:70b197dd8c78fc5d2daf84bd093e8466a2b2e007eedaa85e792e513a820adbf7"}, + {file = "mypy-0.930-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:5feb56f8bb280468fe5fc8e6f56f48f99aa0df9eed3c507a11505ee4657b5380"}, + {file = "mypy-0.930-cp38-cp38-win_amd64.whl", hash = "sha256:2e9c5409e9cb81049bb03fa1009b573dea87976713e3898561567a86c4eaee01"}, + {file = "mypy-0.930-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:554873e45c1ca20f31ddf873deb67fa5d2e87b76b97db50669f0468ccded8fae"}, + {file = "mypy-0.930-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0feb82e9fa849affca7edd24713dbe809dce780ced9f3feca5ed3d80e40b777f"}, + {file = "mypy-0.930-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_12_x86_64.manylinux2010_x86_64.whl", hash = "sha256:bc1a0607ea03c30225347334af66b0af12eefba018a89a88c209e02b7065ea95"}, + {file = "mypy-0.930-cp39-cp39-win_amd64.whl", hash = "sha256:f9f665d69034b1fcfdbcd4197480d26298bbfb5d2dfe206245b6498addb34999"}, + {file = "mypy-0.930-py3-none-any.whl", hash = "sha256:bf4a44e03040206f7c058d1f5ba02ef2d1820720c88bc4285c7d9a4269f54173"}, + {file = "mypy-0.930.tar.gz", hash = "sha256:51426262ae4714cc7dd5439814676e0992b55bcc0f6514eccb4cf8e0678962c2"}, ] mypy-extensions = [ {file = "mypy_extensions-0.4.3-py2.py3-none-any.whl", hash = "sha256:090fedd75945a69ae91ce1303b5824f428daf5a028d2f6ab8a299250a846f15d"}, @@ -1529,6 +1526,10 @@ requests = [ {file = "ruamel.yaml-0.17.17.tar.gz", hash = "sha256:9751de4cbb57d4bfbf8fc394e125ed4a2f170fbff3dc3d78abf50be85924f8be"}, ] "ruamel.yaml.clib" = [ + {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:6e7be2c5bcb297f5b82fee9c665eb2eb7001d1050deaba8471842979293a80b0"}, + {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl", hash = "sha256:221eca6f35076c6ae472a531afa1c223b9c29377e62936f61bc8e6e8bdc5f9e7"}, + {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win32.whl", hash = "sha256:1070ba9dd7f9370d0513d649420c3b362ac2d687fe78c6e888f5b12bf8bc7bee"}, + {file = "ruamel.yaml.clib-0.2.6-cp310-cp310-win_amd64.whl", hash = "sha256:77df077d32921ad46f34816a9a16e6356d8100374579bc35e15bab5d4e9377de"}, {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-macosx_10_6_intel.whl", hash = "sha256:cfdb9389d888c5b74af297e51ce357b800dd844898af9d4a547ffc143fa56751"}, {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-manylinux1_x86_64.whl", hash = "sha256:7b2927e92feb51d830f531de4ccb11b320255ee95e791022555971c466af4527"}, {file = "ruamel.yaml.clib-0.2.6-cp35-cp35m-win32.whl", hash = "sha256:ada3f400d9923a190ea8b59c8f60680c4ef8a4b0dfae134d2f2ff68429adfab5"}, diff --git a/pyproject.toml b/pyproject.toml index feded5bf884..ff3b780a1e8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "aws_lambda_powertools" -version = "1.22.0" +version = "1.23.0" description = "A suite of utilities for AWS Lambda functions to ease adopting best practices such as tracing, structured logging, custom metrics, batching, idempotency, feature flags, and more." authors = ["Amazon Web Services"] include = ["aws_lambda_powertools/py.typed", "THIRD-PARTY-LICENSES"] @@ -53,7 +53,7 @@ flake8-bugbear = "^21.11.29" mkdocs-material = "^7.3.6" mkdocs-git-revision-date-plugin = "^0.3.1" mike = "^0.6.0" -mypy = "^0.910" +mypy = "^0.930" [tool.poetry.extras] @@ -85,6 +85,9 @@ exclude_lines = [ # Don't complain if non-runnable code isn't run: "if 0:", "if __name__ == .__main__.:", + + # Ignore type function overload + "@overload", ] [tool.isort] diff --git a/tests/events/apiGatewayProxyEventPrincipalId.json b/tests/events/apiGatewayProxyEventPrincipalId.json new file mode 100644 index 00000000000..f18a2a44bbd --- /dev/null +++ b/tests/events/apiGatewayProxyEventPrincipalId.json @@ -0,0 +1,13 @@ +{ + "resource": "/trip", + "path": "/trip", + "httpMethod": "POST", + "requestContext": { + "requestId": "34972478-2843-4ced-a657-253108738274", + "authorizer": { + "user_id": "fake_username", + "principalId": "fake", + "integrationLatency": 451 + } + } +} diff --git a/tests/functional/data_classes/test_amazon_mq.py b/tests/functional/data_classes/test_amazon_mq.py index 0f4f5079565..a88a962c17b 100644 --- a/tests/functional/data_classes/test_amazon_mq.py +++ b/tests/functional/data_classes/test_amazon_mq.py @@ -34,6 +34,7 @@ def test_active_mq_event(): messages = list(event.messages) message = messages[1] assert message.json_data["timeout"] == 0 + assert message.json_data["data"] == "CZrmf0Gw8Ov4bqLQxD4E" def test_rabbit_mq_event(): @@ -47,6 +48,7 @@ def test_rabbit_mq_event(): assert message.data is not None assert message.decoded_data is not None assert message.json_data["timeout"] == 0 + assert message.json_data["data"] == "CZrmf0Gw8Ov4bqLQxD4E" assert isinstance(message, RabbitMessage) properties = message.basic_properties diff --git a/tests/functional/event_handler/test_api_gateway.py b/tests/functional/event_handler/test_api_gateway.py index f28752e6de6..76ecbc7cdd7 100644 --- a/tests/functional/event_handler/test_api_gateway.py +++ b/tests/functional/event_handler/test_api_gateway.py @@ -163,7 +163,7 @@ def patch_func(): def handler(event, context): return app.resolve(event, context) - # Also check check the route configurations + # Also check the route configurations routes = app._routes assert len(routes) == 5 for route in routes: @@ -1076,3 +1076,92 @@ def foo(): assert result["statusCode"] == 200 assert result["headers"]["Content-Type"] == content_types.APPLICATION_JSON + + +def test_exception_handler(): + # GIVEN a resolver with an exception handler defined for ValueError + app = ApiGatewayResolver() + + @app.exception_handler(ValueError) + def handle_value_error(ex: ValueError): + print(f"request path is '{app.current_event.path}'") + return Response( + status_code=418, + content_type=content_types.TEXT_HTML, + body=str(ex), + ) + + @app.get("/my/path") + def get_lambda() -> Response: + raise ValueError("Foo!") + + # WHEN calling the event handler + # AND a ValueError is raised + result = app(LOAD_GW_EVENT, {}) + + # THEN call the exception_handler + assert result["statusCode"] == 418 + assert result["headers"]["Content-Type"] == content_types.TEXT_HTML + assert result["body"] == "Foo!" + + +def test_exception_handler_service_error(): + # GIVEN + app = ApiGatewayResolver() + + @app.exception_handler(ServiceError) + def service_error(ex: ServiceError): + print(ex.msg) + return Response( + status_code=ex.status_code, + content_type=content_types.APPLICATION_JSON, + body="CUSTOM ERROR FORMAT", + ) + + @app.get("/my/path") + def get_lambda() -> Response: + raise InternalServerError("Something sensitive") + + # WHEN calling the event handler + # AND a ServiceError is raised + result = app(LOAD_GW_EVENT, {}) + + # THEN call the exception_handler + assert result["statusCode"] == 500 + assert result["headers"]["Content-Type"] == content_types.APPLICATION_JSON + assert result["body"] == "CUSTOM ERROR FORMAT" + + +def test_exception_handler_not_found(): + # GIVEN a resolver with an exception handler defined for a 404 not found + app = ApiGatewayResolver() + + @app.not_found + def handle_not_found(exc: NotFoundError) -> Response: + assert isinstance(exc, NotFoundError) + return Response(status_code=404, content_type=content_types.TEXT_PLAIN, body="I am a teapot!") + + # WHEN calling the event handler + # AND no route is found + result = app(LOAD_GW_EVENT, {}) + + # THEN call the exception_handler + assert result["statusCode"] == 404 + assert result["headers"]["Content-Type"] == content_types.TEXT_PLAIN + assert result["body"] == "I am a teapot!" + + +def test_exception_handler_not_found_alt(): + # GIVEN a resolver with `@app.not_found()` + app = ApiGatewayResolver() + + @app.not_found() + def handle_not_found(_) -> Response: + return Response(status_code=404, content_type=content_types.APPLICATION_JSON, body="{}") + + # WHEN calling the event handler + # AND no route is found + result = app(LOAD_GW_EVENT, {}) + + # THEN call the @app.not_found() function + assert result["statusCode"] == 404 diff --git a/tests/functional/idempotency/conftest.py b/tests/functional/idempotency/conftest.py index 0f74d503b88..017445ab348 100644 --- a/tests/functional/idempotency/conftest.py +++ b/tests/functional/idempotency/conftest.py @@ -1,5 +1,4 @@ import datetime -import hashlib import json from collections import namedtuple from decimal import Decimal @@ -11,20 +10,15 @@ from botocore.config import Config from jmespath import functions -from aws_lambda_powertools.shared.json_encoder import Encoder from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer from aws_lambda_powertools.utilities.idempotency.idempotency import IdempotencyConfig from aws_lambda_powertools.utilities.jmespath_utils import extract_data_from_envelope from aws_lambda_powertools.utilities.validation import envelopes -from tests.functional.utils import load_event +from tests.functional.utils import hash_idempotency_key, json_serialize, load_event TABLE_NAME = "TEST_TABLE" -def serialize(data): - return json.dumps(data, sort_keys=True, cls=Encoder) - - @pytest.fixture(scope="module") def config() -> Config: return Config(region_name="us-east-1") @@ -66,12 +60,12 @@ def lambda_response(): @pytest.fixture(scope="module") def serialized_lambda_response(lambda_response): - return serialize(lambda_response) + return json_serialize(lambda_response) @pytest.fixture(scope="module") def deserialized_lambda_response(lambda_response): - return json.loads(serialize(lambda_response)) + return json.loads(json_serialize(lambda_response)) @pytest.fixture @@ -150,7 +144,7 @@ def expected_params_put_item_with_validation(hashed_idempotency_key, hashed_vali def hashed_idempotency_key(lambda_apigw_event, default_jmespath, lambda_context): compiled_jmespath = jmespath.compile(default_jmespath) data = compiled_jmespath.search(lambda_apigw_event) - return "test-func.lambda_handler#" + hashlib.md5(serialize(data).encode()).hexdigest() + return "test-func.lambda_handler#" + hash_idempotency_key(data) @pytest.fixture @@ -158,12 +152,12 @@ def hashed_idempotency_key_with_envelope(lambda_apigw_event): event = extract_data_from_envelope( data=lambda_apigw_event, envelope=envelopes.API_GATEWAY_HTTP, jmespath_options={} ) - return "test-func.lambda_handler#" + hashlib.md5(serialize(event).encode()).hexdigest() + return "test-func.lambda_handler#" + hash_idempotency_key(event) @pytest.fixture def hashed_validation_key(lambda_apigw_event): - return hashlib.md5(serialize(lambda_apigw_event["requestContext"]).encode()).hexdigest() + return hash_idempotency_key(lambda_apigw_event["requestContext"]) @pytest.fixture @@ -171,6 +165,11 @@ def persistence_store(config): return DynamoDBPersistenceLayer(table_name=TABLE_NAME, boto_config=config) +@pytest.fixture +def persistence_store_compound(config): + return DynamoDBPersistenceLayer(table_name=TABLE_NAME, boto_config=config, key_attr="id", sort_key_attr="sk") + + @pytest.fixture def idempotency_config(config, request, default_jmespath): return IdempotencyConfig( diff --git a/tests/functional/idempotency/test_idempotency.py b/tests/functional/idempotency/test_idempotency.py index a8cf652d8a0..0732f1d58b1 100644 --- a/tests/functional/idempotency/test_idempotency.py +++ b/tests/functional/idempotency/test_idempotency.py @@ -1,6 +1,4 @@ import copy -import hashlib -import json import sys from hashlib import md5 from unittest.mock import MagicMock @@ -8,9 +6,11 @@ import jmespath import pytest from botocore import stub +from pydantic import BaseModel from aws_lambda_powertools.utilities.data_classes import APIGatewayProxyEventV2, event_source from aws_lambda_powertools.utilities.idempotency import DynamoDBPersistenceLayer, IdempotencyConfig +from aws_lambda_powertools.utilities.idempotency.base import _prepare_data from aws_lambda_powertools.utilities.idempotency.exceptions import ( IdempotencyAlreadyInProgressError, IdempotencyInconsistentStateError, @@ -22,12 +22,18 @@ from aws_lambda_powertools.utilities.idempotency.idempotency import idempotent, idempotent_function from aws_lambda_powertools.utilities.idempotency.persistence.base import BasePersistenceLayer, DataRecord from aws_lambda_powertools.utilities.validation import envelopes, validator -from tests.functional.idempotency.conftest import serialize -from tests.functional.utils import load_event +from tests.functional.utils import hash_idempotency_key, json_serialize, load_event TABLE_NAME = "TEST_TABLE" +def get_dataclasses_lib(): + """Python 3.6 doesn't support dataclasses natively""" + import dataclasses + + return dataclasses + + # Using parametrize to run test twice, with two separate instances of persistence store. One instance with caching # enabled, and one without. @pytest.mark.parametrize("idempotency_config", [{"use_local_cache": False}, {"use_local_cache": True}], indirect=True) @@ -744,7 +750,7 @@ def test_default_no_raise_on_missing_idempotency_key( hashed_key = persistence_store._get_hashed_idempotency_key({}) # THEN return the hash of None - expected_value = f"test-func.{function_name}#" + md5(serialize(None).encode()).hexdigest() + expected_value = f"test-func.{function_name}#" + md5(json_serialize(None).encode()).hexdigest() assert expected_value == hashed_key @@ -788,7 +794,7 @@ def test_jmespath_with_powertools_json( expected_value = [sub_attr_value, static_pk_value] api_gateway_proxy_event = { "requestContext": {"authorizer": {"claims": {"sub": sub_attr_value}}}, - "body": serialize({"id": static_pk_value}), + "body": json_serialize({"id": static_pk_value}), } # WHEN calling _get_hashed_idempotency_key @@ -872,9 +878,7 @@ def _delete_record(self, data_record: DataRecord) -> None: def test_idempotent_lambda_event_source(lambda_context): # Scenario to validate that we can use the event_source decorator before or after the idempotent decorator mock_event = load_event("apiGatewayProxyV2Event.json") - persistence_layer = MockPersistenceLayer( - "test-func.lambda_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer("test-func.lambda_handler#" + hash_idempotency_key(mock_event)) expected_result = {"message": "Foo"} # GIVEN an event_source decorator @@ -894,9 +898,8 @@ def lambda_handler(event, _): def test_idempotent_function(): # Scenario to validate we can use idempotent_function with any function mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") @@ -913,9 +916,8 @@ def test_idempotent_function_arbitrary_args_kwargs(): # Scenario to validate we can use idempotent_function with a function # with an arbitrary number of args and kwargs mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") @@ -930,9 +932,8 @@ def record_handler(arg_one, arg_two, record, is_record): def test_idempotent_function_invalid_data_kwarg(): mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} keyword_argument = "payload" @@ -949,9 +950,8 @@ def record_handler(record): def test_idempotent_function_arg_instead_of_kwarg(): mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} keyword_argument = "record" @@ -969,18 +969,15 @@ def record_handler(record): def test_idempotent_function_and_lambda_handler(lambda_context): # Scenario to validate we can use both idempotent_function and idempotent decorators mock_event = {"data": "value"} - persistence_layer = MockPersistenceLayer( - "test-func.record_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + idempotency_key = "test-func.record_handler#" + hash_idempotency_key(mock_event) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) expected_result = {"message": "Foo"} @idempotent_function(persistence_store=persistence_layer, data_keyword_argument="record") def record_handler(record): return expected_result - persistence_layer = MockPersistenceLayer( - "test-func.lambda_handler#" + hashlib.md5(serialize(mock_event).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer("test-func.lambda_handler#" + hash_idempotency_key(mock_event)) @idempotent(persistence_store=persistence_layer) def lambda_handler(event, _): @@ -1001,18 +998,16 @@ def test_idempotent_data_sorting(): # Scenario to validate same data in different order hashes to the same idempotency key data_one = {"data": "test message 1", "more_data": "more data 1"} data_two = {"more_data": "more data 1", "data": "test message 1"} - + idempotency_key = "test-func.dummy#" + hash_idempotency_key(data_one) # Assertion will happen in MockPersistenceLayer - persistence_layer = MockPersistenceLayer( - "test-func.dummy#" + hashlib.md5(json.dumps(data_one).encode()).hexdigest() - ) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) # GIVEN @idempotent_function(data_keyword_argument="payload", persistence_store=persistence_layer) def dummy(payload): return {"message": "hello"} - # WHEN + # WHEN/THEN assertion will happen at MockPersistenceLayer dummy(payload=data_two) @@ -1057,3 +1052,145 @@ def two(data): assert one(data=mock_event) == "one" assert two(data=mock_event) == "two" assert len(persistence_store.table.method_calls) == 4 + + +def test_invalid_dynamodb_persistence_layer(): + # Scenario constructing a DynamoDBPersistenceLayer with a key_attr matching sort_key_attr should fail + with pytest.raises(ValueError) as ve: + DynamoDBPersistenceLayer( + table_name="Foo", + key_attr="id", + sort_key_attr="id", + ) + # and raise a ValueError + assert str(ve.value) == "key_attr [id] and sort_key_attr [id] cannot be the same!" + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") +def test_idempotent_function_dataclasses(): + # Scenario _prepare_data should convert a python dataclasses to a dict + dataclasses = get_dataclasses_lib() + + @dataclasses.dataclass + class Foo: + name: str + + expected_result = {"name": "Bar"} + data = Foo(name="Bar") + as_dict = _prepare_data(data) + assert as_dict == dataclasses.asdict(data) + assert as_dict == expected_result + + +def test_idempotent_function_pydantic(): + # Scenario _prepare_data should convert a pydantic to a dict + class Foo(BaseModel): + name: str + + expected_result = {"name": "Bar"} + data = Foo(name="Bar") + as_dict = _prepare_data(data) + assert as_dict == data.dict() + assert as_dict == expected_result + + +@pytest.mark.parametrize("data", [None, "foo", ["foo"], 1, True, {}]) +def test_idempotent_function_other(data): + # All other data types should be left as is + assert _prepare_data(data) == data + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") +def test_idempotent_function_dataclass_with_jmespath(): + # GIVEN + dataclasses = get_dataclasses_lib() + config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) + mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} + idempotency_key = "test-func.collect_payment#" + hash_idempotency_key(mock_event["transaction_id"]) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) + + @dataclasses.dataclass + class Payment: + customer_id: str + transaction_id: str + + @idempotent_function(data_keyword_argument="payment", persistence_store=persistence_layer, config=config) + def collect_payment(payment: Payment): + return payment.transaction_id + + # WHEN + payment = Payment(**mock_event) + result = collect_payment(payment=payment) + + # THEN idempotency key assertion happens at MockPersistenceLayer + assert result == payment.transaction_id + + +@pytest.mark.skipif(sys.version_info < (3, 7), reason="requires python3.7 or higher for dataclasses") +def test_idempotent_function_pydantic_with_jmespath(): + # GIVEN + config = IdempotencyConfig(event_key_jmespath="transaction_id", use_local_cache=True) + mock_event = {"customer_id": "fake", "transaction_id": "fake-id"} + idempotency_key = "test-func.collect_payment#" + hash_idempotency_key(mock_event["transaction_id"]) + persistence_layer = MockPersistenceLayer(expected_idempotency_key=idempotency_key) + + class Payment(BaseModel): + customer_id: str + transaction_id: str + + @idempotent_function(data_keyword_argument="payment", persistence_store=persistence_layer, config=config) + def collect_payment(payment: Payment): + return payment.transaction_id + + # WHEN + payment = Payment(**mock_event) + result = collect_payment(payment=payment) + + # THEN idempotency key assertion happens at MockPersistenceLayer + assert result == payment.transaction_id + + +@pytest.mark.parametrize("idempotency_config", [{"use_local_cache": False}], indirect=True) +def test_idempotent_lambda_compound_already_completed( + idempotency_config: IdempotencyConfig, + persistence_store_compound: DynamoDBPersistenceLayer, + lambda_apigw_event, + timestamp_future, + hashed_idempotency_key, + serialized_lambda_response, + deserialized_lambda_response, + lambda_context, +): + """ + Test idempotent decorator having a DynamoDBPersistenceLayer with a compound key + """ + + stubber = stub.Stubber(persistence_store_compound.table.meta.client) + stubber.add_client_error("put_item", "ConditionalCheckFailedException") + ddb_response = { + "Item": { + "id": {"S": "idempotency#"}, + "sk": {"S": hashed_idempotency_key}, + "expiration": {"N": timestamp_future}, + "data": {"S": serialized_lambda_response}, + "status": {"S": "COMPLETED"}, + } + } + expected_params = { + "TableName": TABLE_NAME, + "Key": {"id": "idempotency#", "sk": hashed_idempotency_key}, + "ConsistentRead": True, + } + stubber.add_response("get_item", ddb_response, expected_params) + + stubber.activate() + + @idempotent(config=idempotency_config, persistence_store=persistence_store_compound) + def lambda_handler(event, context): + raise ValueError + + lambda_resp = lambda_handler(lambda_apigw_event, lambda_context) + assert lambda_resp == deserialized_lambda_response + + stubber.assert_no_pending_responses() + stubber.deactivate() diff --git a/tests/functional/parser/test_kinesis.py b/tests/functional/parser/test_kinesis.py index 632a7463805..552cb6cef68 100644 --- a/tests/functional/parser/test_kinesis.py +++ b/tests/functional/parser/test_kinesis.py @@ -35,7 +35,7 @@ def handle_kinesis_no_envelope(event: KinesisDataStreamModel, _: LambdaContext): assert kinesis.approximateArrivalTimestamp == 1545084650.987 assert kinesis.kinesisSchemaVersion == "1.0" assert kinesis.partitionKey == "1" - assert kinesis.sequenceNumber == 49590338271490256608559692538361571095921575989136588898 + assert kinesis.sequenceNumber == "49590338271490256608559692538361571095921575989136588898" assert kinesis.data == b"Hello, this is a test." diff --git a/tests/functional/test_data_classes.py b/tests/functional/test_data_classes.py index ded32639233..d0d936ec2f8 100644 --- a/tests/functional/test_data_classes.py +++ b/tests/functional/test_data_classes.py @@ -272,6 +272,8 @@ def test_cognito_pre_token_generation_trigger_event(): claims_override_details.set_group_configuration_groups_to_override(expected_groups) assert claims_override_details.group_configuration.groups_to_override == expected_groups assert event["response"]["claimsOverrideDetails"]["groupOverrideDetails"]["groupsToOverride"] == expected_groups + claims_override_details = event.response.claims_override_details + assert claims_override_details["groupOverrideDetails"]["groupsToOverride"] == expected_groups claims_override_details.set_group_configuration_iam_roles_to_override(["role"]) assert claims_override_details.group_configuration.iam_roles_to_override == ["role"] @@ -542,6 +544,7 @@ def test_dynamo_attribute_value_null_value(): attribute_value = AttributeValue(example_attribute_value) assert attribute_value.get_type == AttributeValueType.Null + assert attribute_value.null_value is None assert attribute_value.null_value == attribute_value.get_value @@ -897,6 +900,20 @@ def test_api_gateway_proxy_event(): assert request_context.identity.client_cert.subject_dn == "www.example.com" +def test_api_gateway_proxy_event_with_principal_id(): + event = APIGatewayProxyEvent(load_event("apiGatewayProxyEventPrincipalId.json")) + + request_context = event.request_context + authorizer = request_context.authorizer + assert authorizer.claims is None + assert authorizer.scopes is None + assert authorizer["principalId"] == "fake" + assert authorizer.get("principalId") == "fake" + assert authorizer.principal_id == "fake" + assert authorizer.integration_latency == 451 + assert authorizer.get("integrationStatus", "failed") == "failed" + + def test_api_gateway_proxy_v2_event(): event = APIGatewayProxyEventV2(load_event("apiGatewayProxyV2Event.json")) @@ -1040,6 +1057,7 @@ def test_base_proxy_event_json_body(): data = {"message": "Foo"} event = BaseProxyEvent({"body": json.dumps(data)}) assert event.json_body == data + assert event.json_body["message"] == "Foo" def test_base_proxy_event_decode_body_key_error(): @@ -1070,7 +1088,7 @@ def test_base_proxy_event_json_body_with_base64_encoded_data(): event = BaseProxyEvent({"body": encoded_data, "isBase64Encoded": True}) # WHEN calling json_body - # THEN then base64 decode and json load + # THEN base64 decode and json load assert event.json_body == data @@ -1106,7 +1124,8 @@ def test_kinesis_stream_event_json_data(): json_value = {"test": "value"} data = base64.b64encode(bytes(json.dumps(json_value), "utf-8")).decode("utf-8") event = KinesisStreamEvent({"Records": [{"kinesis": {"data": data}}]}) - assert next(event.records).kinesis.data_as_json() == json_value + record = next(event.records) + assert record.kinesis.data_as_json() == json_value def test_alb_event(): @@ -1378,9 +1397,11 @@ def test_code_pipeline_event_decoded_data(): event = CodePipelineJobEvent(load_event("codePipelineEventData.json")) assert event.data.continuation_token is None - decoded_params = event.data.action_configuration.configuration.decoded_user_parameters + configuration = event.data.action_configuration.configuration + decoded_params = configuration.decoded_user_parameters assert decoded_params == event.decoded_user_parameters - assert "VALUE" == decoded_params["KEY"] + assert decoded_params["KEY"] == "VALUE" + assert configuration.decoded_user_parameters["KEY"] == "VALUE" assert "my-pipeline-SourceArtifact" == event.data.input_artifacts[0].name diff --git a/tests/functional/test_logger.py b/tests/functional/test_logger.py index 3c9a8a54189..3fb43474081 100644 --- a/tests/functional/test_logger.py +++ b/tests/functional/test_logger.py @@ -597,3 +597,16 @@ def handler(event, context): first_log, second_log = capture_multiple_logging_statements_output(stdout) assert "my_key" in first_log assert "my_key" not in second_log + + +def test_inject_lambda_context_allows_handler_with_kwargs(lambda_context, stdout, service_name): + # GIVEN + logger = Logger(service=service_name, stream=stdout) + + # WHEN + @logger.inject_lambda_context(clear_state=True) + def handler(event, context, my_custom_option=None): + pass + + # THEN + handler({}, lambda_context, my_custom_option="blah") diff --git a/tests/functional/test_shared_functions.py b/tests/functional/test_shared_functions.py index cc4fd77fbe5..c71b7239739 100644 --- a/tests/functional/test_shared_functions.py +++ b/tests/functional/test_shared_functions.py @@ -1,4 +1,6 @@ -from aws_lambda_powertools.shared.functions import resolve_env_var_choice, resolve_truthy_env_var_choice +import pytest + +from aws_lambda_powertools.shared.functions import resolve_env_var_choice, resolve_truthy_env_var_choice, strtobool def test_resolve_env_var_choice_explicit_wins_over_env_var(): @@ -9,3 +11,19 @@ def test_resolve_env_var_choice_explicit_wins_over_env_var(): def test_resolve_env_var_choice_env_wins_over_absent_explicit(): assert resolve_truthy_env_var_choice(env="true") == 1 assert resolve_env_var_choice(env="something") == "something" + + +@pytest.mark.parametrize("true_value", ["y", "yes", "t", "true", "on", "1"]) +def test_strtobool_true(true_value): + assert strtobool(true_value) + + +@pytest.mark.parametrize("false_value", ["n", "no", "f", "false", "off", "0"]) +def test_strtobool_false(false_value): + assert strtobool(false_value) is False + + +def test_strtobool_value_error(): + with pytest.raises(ValueError) as exp: + strtobool("fail") + assert str(exp.value) == "invalid truth value 'fail'" diff --git a/tests/functional/test_utilities_batch.py b/tests/functional/test_utilities_batch.py index a453f0bfe07..3728af3111d 100644 --- a/tests/functional/test_utilities_batch.py +++ b/tests/functional/test_utilities_batch.py @@ -1,12 +1,29 @@ -from typing import Callable +import json +from random import randint +from typing import Callable, Dict, Optional from unittest.mock import patch import pytest from botocore.config import Config from botocore.stub import Stubber -from aws_lambda_powertools.utilities.batch import PartialSQSProcessor, batch_processor, sqs_batch_processor -from aws_lambda_powertools.utilities.batch.exceptions import SQSBatchProcessingError +from aws_lambda_powertools.utilities.batch import ( + BatchProcessor, + EventType, + PartialSQSProcessor, + batch_processor, + sqs_batch_processor, +) +from aws_lambda_powertools.utilities.batch.exceptions import BatchProcessingError, SQSBatchProcessingError +from aws_lambda_powertools.utilities.data_classes.dynamo_db_stream_event import DynamoDBRecord +from aws_lambda_powertools.utilities.data_classes.kinesis_stream_event import KinesisStreamRecord +from aws_lambda_powertools.utilities.data_classes.sqs_event import SQSRecord +from aws_lambda_powertools.utilities.parser import BaseModel, validator +from aws_lambda_powertools.utilities.parser.models import DynamoDBStreamChangedRecordModel, DynamoDBStreamRecordModel +from aws_lambda_powertools.utilities.parser.models import KinesisDataStreamRecord as KinesisDataStreamRecordModel +from aws_lambda_powertools.utilities.parser.models import KinesisDataStreamRecordPayload, SqsRecordModel +from aws_lambda_powertools.utilities.parser.types import Literal +from tests.functional.utils import b64_to_str, str_to_b64 @pytest.fixture(scope="module") @@ -16,7 +33,12 @@ def factory(body: str): "messageId": "059f36b4-87a3-44ab-83d2-661975830a7d", "receiptHandle": "AQEBwJnKyrHigUMZj6rYigCgxlaS3SLy0a", "body": body, - "attributes": {}, + "attributes": { + "ApproximateReceiveCount": "1", + "SentTimestamp": "1545082649183", + "SenderId": "AIDAIENQZJOLO23YVJ4VO", + "ApproximateFirstReceiveTimestamp": "1545082649185", + }, "messageAttributes": {}, "md5OfBody": "e4e68fb7bd0e697a0ae8f1bb342846b3", "eventSource": "aws:sqs", @@ -27,6 +49,53 @@ def factory(body: str): return factory +@pytest.fixture(scope="module") +def kinesis_event_factory() -> Callable: + def factory(body: str): + seq = "".join(str(randint(0, 9)) for _ in range(52)) + return { + "kinesis": { + "kinesisSchemaVersion": "1.0", + "partitionKey": "1", + "sequenceNumber": seq, + "data": str_to_b64(body), + "approximateArrivalTimestamp": 1545084650.987, + }, + "eventSource": "aws:kinesis", + "eventVersion": "1.0", + "eventID": f"shardId-000000000006:{seq}", + "eventName": "aws:kinesis:record", + "invokeIdentityArn": "arn:aws:iam::123456789012:role/lambda-role", + "awsRegion": "us-east-2", + "eventSourceARN": "arn:aws:kinesis:us-east-2:123456789012:stream/lambda-stream", + } + + return factory + + +@pytest.fixture(scope="module") +def dynamodb_event_factory() -> Callable: + def factory(body: str): + seq = "".join(str(randint(0, 9)) for _ in range(10)) + return { + "eventID": "1", + "eventVersion": "1.0", + "dynamodb": { + "Keys": {"Id": {"N": "101"}}, + "NewImage": {"Message": {"S": body}}, + "StreamViewType": "NEW_AND_OLD_IMAGES", + "SequenceNumber": seq, + "SizeBytes": 26, + }, + "awsRegion": "us-west-2", + "eventName": "INSERT", + "eventSourceARN": "eventsource_arn", + "eventSource": "aws:dynamodb", + } + + return factory + + @pytest.fixture(scope="module") def record_handler() -> Callable: def handler(record): @@ -38,6 +107,28 @@ def handler(record): return handler +@pytest.fixture(scope="module") +def kinesis_record_handler() -> Callable: + def handler(record: KinesisStreamRecord): + body = b64_to_str(record.kinesis.data) + if "fail" in body: + raise Exception("Failed to process record.") + return body + + return handler + + +@pytest.fixture(scope="module") +def dynamodb_record_handler() -> Callable: + def handler(record: DynamoDBRecord): + body = record.dynamodb.new_image.get("Message").get_value + if "fail" in body: + raise Exception("Failed to process record.") + return body + + return handler + + @pytest.fixture(scope="module") def config() -> Config: return Config(region_name="us-east-1") @@ -67,6 +158,14 @@ def stubbed_partial_processor_suppressed(config) -> PartialSQSProcessor: yield stubber, processor +@pytest.fixture(scope="module") +def order_event_factory() -> Callable: + def factory(item: Dict) -> str: + return json.dumps({"item": item}) + + return factory + + def test_partial_sqs_processor_context_with_failure(sqs_event_factory, record_handler, partial_processor): """ Test processor with one failing record @@ -290,3 +389,450 @@ def test_partial_sqs_processor_context_only_failure(sqs_event_factory, record_ha ctx.process() assert len(error.value.child_exceptions) == 2 + + +def test_batch_processor_middleware_success_only(sqs_event_factory, record_handler): + # GIVEN + first_record = SQSRecord(sqs_event_factory("success")) + second_record = SQSRecord(sqs_event_factory("success")) + event = {"Records": [first_record.raw_event, second_record.raw_event]} + + processor = BatchProcessor(event_type=EventType.SQS) + + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + + # WHEN + result = lambda_handler(event, {}) + + # THEN + assert result["batchItemFailures"] == [] + + +def test_batch_processor_middleware_with_failure(sqs_event_factory, record_handler): + # GIVEN + first_record = SQSRecord(sqs_event_factory("fail")) + second_record = SQSRecord(sqs_event_factory("success")) + event = {"Records": [first_record.raw_event, second_record.raw_event]} + + processor = BatchProcessor(event_type=EventType.SQS) + + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + + # WHEN + result = lambda_handler(event, {}) + + # THEN + assert len(result["batchItemFailures"]) == 1 + + +def test_batch_processor_context_success_only(sqs_event_factory, record_handler): + # GIVEN + first_record = SQSRecord(sqs_event_factory("success")) + second_record = SQSRecord(sqs_event_factory("success")) + records = [first_record.raw_event, second_record.raw_event] + processor = BatchProcessor(event_type=EventType.SQS) + + # WHEN + with processor(records, record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages == [ + ("success", first_record.body, first_record.raw_event), + ("success", second_record.body, second_record.raw_event), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_context_with_failure(sqs_event_factory, record_handler): + # GIVEN + first_record = SQSRecord(sqs_event_factory("failure")) + second_record = SQSRecord(sqs_event_factory("success")) + records = [first_record.raw_event, second_record.raw_event] + processor = BatchProcessor(event_type=EventType.SQS) + + # WHEN + with processor(records, record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages[1] == ("success", second_record.body, second_record.raw_event) + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record.message_id}]} + + +def test_batch_processor_kinesis_context_success_only(kinesis_event_factory, kinesis_record_handler): + # GIVEN + first_record = KinesisStreamRecord(kinesis_event_factory("success")) + second_record = KinesisStreamRecord(kinesis_event_factory("success")) + + records = [first_record.raw_event, second_record.raw_event] + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + + # WHEN + with processor(records, kinesis_record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages == [ + ("success", b64_to_str(first_record.kinesis.data), first_record.raw_event), + ("success", b64_to_str(second_record.kinesis.data), second_record.raw_event), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_kinesis_context_with_failure(kinesis_event_factory, kinesis_record_handler): + # GIVEN + first_record = KinesisStreamRecord(kinesis_event_factory("failure")) + second_record = KinesisStreamRecord(kinesis_event_factory("success")) + + records = [first_record.raw_event, second_record.raw_event] + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + + # WHEN + with processor(records, kinesis_record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages[1] == ("success", b64_to_str(second_record.kinesis.data), second_record.raw_event) + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record.kinesis.sequence_number}]} + + +def test_batch_processor_kinesis_middleware_with_failure(kinesis_event_factory, kinesis_record_handler): + # GIVEN + first_record = KinesisStreamRecord(kinesis_event_factory("failure")) + second_record = KinesisStreamRecord(kinesis_event_factory("success")) + event = {"Records": [first_record.raw_event, second_record.raw_event]} + + processor = BatchProcessor(event_type=EventType.KinesisDataStreams) + + @batch_processor(record_handler=kinesis_record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + + # WHEN + result = lambda_handler(event, {}) + + # THEN + assert len(result["batchItemFailures"]) == 1 + + +def test_batch_processor_dynamodb_context_success_only(dynamodb_event_factory, dynamodb_record_handler): + # GIVEN + first_record = dynamodb_event_factory("success") + second_record = dynamodb_event_factory("success") + records = [first_record, second_record] + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + + # WHEN + with processor(records, dynamodb_record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages == [ + ("success", first_record["dynamodb"]["NewImage"]["Message"]["S"], first_record), + ("success", second_record["dynamodb"]["NewImage"]["Message"]["S"], second_record), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_dynamodb_context_with_failure(dynamodb_event_factory, dynamodb_record_handler): + # GIVEN + first_record = dynamodb_event_factory("failure") + second_record = dynamodb_event_factory("success") + records = [first_record, second_record] + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + + # WHEN + with processor(records, dynamodb_record_handler) as batch: + processed_messages = batch.process() + + # THEN + assert processed_messages[1] == ("success", second_record["dynamodb"]["NewImage"]["Message"]["S"], second_record) + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]}]} + + +def test_batch_processor_dynamodb_middleware_with_failure(dynamodb_event_factory, dynamodb_record_handler): + # GIVEN + first_record = dynamodb_event_factory("failure") + second_record = dynamodb_event_factory("success") + event = {"Records": [first_record, second_record]} + + processor = BatchProcessor(event_type=EventType.DynamoDBStreams) + + @batch_processor(record_handler=dynamodb_record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + + # WHEN + result = lambda_handler(event, {}) + + # THEN + assert len(result["batchItemFailures"]) == 1 + + +def test_batch_processor_context_model(sqs_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderSqs(SqsRecordModel): + body: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("body", pre=True) + def transform_body_to_dict(cls, value: str): + return json.loads(value) + + def record_handler(record: OrderSqs): + return record.body.item + + order_event = order_event_factory({"type": "success"}) + first_record = sqs_event_factory(order_event) + second_record = sqs_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.SQS, model=OrderSqs) + with processor(records, record_handler) as batch: + processed_messages = batch.process() + + # THEN + order_item = json.loads(order_event)["item"] + assert processed_messages == [ + ("success", order_item, first_record), + ("success", order_item, second_record), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_context_model_with_failure(sqs_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderSqs(SqsRecordModel): + body: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("body", pre=True) + def transform_body_to_dict(cls, value: str): + return json.loads(value) + + def record_handler(record: OrderSqs): + if "fail" in record.body.item["type"]: + raise Exception("Failed to process record.") + return record.body.item + + order_event = order_event_factory({"type": "success"}) + order_event_fail = order_event_factory({"type": "fail"}) + first_record = sqs_event_factory(order_event_fail) + second_record = sqs_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.SQS, model=OrderSqs) + with processor(records, record_handler) as batch: + batch.process() + + # THEN + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["messageId"]}]} + + +def test_batch_processor_dynamodb_context_model(dynamodb_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderDynamoDB(BaseModel): + Message: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("Message", pre=True) + def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): + return json.loads(value["S"]) + + class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): + NewImage: Optional[OrderDynamoDB] + OldImage: Optional[OrderDynamoDB] + + class OrderDynamoDBRecord(DynamoDBStreamRecordModel): + dynamodb: OrderDynamoDBChangeRecord + + def record_handler(record: OrderDynamoDBRecord): + return record.dynamodb.NewImage.Message.item + + order_event = order_event_factory({"type": "success"}) + first_record = dynamodb_event_factory(order_event) + second_record = dynamodb_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.DynamoDBStreams, model=OrderDynamoDBRecord) + with processor(records, record_handler) as batch: + processed_messages = batch.process() + + # THEN + order_item = json.loads(order_event)["item"] + assert processed_messages == [ + ("success", order_item, first_record), + ("success", order_item, second_record), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_dynamodb_context_model_with_failure(dynamodb_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderDynamoDB(BaseModel): + Message: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("Message", pre=True) + def transform_message_to_dict(cls, value: Dict[Literal["S"], str]): + return json.loads(value["S"]) + + class OrderDynamoDBChangeRecord(DynamoDBStreamChangedRecordModel): + NewImage: Optional[OrderDynamoDB] + OldImage: Optional[OrderDynamoDB] + + class OrderDynamoDBRecord(DynamoDBStreamRecordModel): + dynamodb: OrderDynamoDBChangeRecord + + def record_handler(record: OrderDynamoDBRecord): + if "fail" in record.dynamodb.NewImage.Message.item["type"]: + raise Exception("Failed to process record.") + return record.dynamodb.NewImage.Message.item + + order_event = order_event_factory({"type": "success"}) + order_event_fail = order_event_factory({"type": "fail"}) + first_record = dynamodb_event_factory(order_event_fail) + second_record = dynamodb_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.DynamoDBStreams, model=OrderDynamoDBRecord) + with processor(records, record_handler) as batch: + batch.process() + + # THEN + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["dynamodb"]["SequenceNumber"]}]} + + +def test_batch_processor_kinesis_context_parser_model(kinesis_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderKinesisPayloadRecord(KinesisDataStreamRecordPayload): + data: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("data", pre=True) + def transform_message_to_dict(cls, value: str): + # Powertools KinesisDataStreamRecordModel already decodes b64 to str here + return json.loads(value) + + class OrderKinesisRecord(KinesisDataStreamRecordModel): + kinesis: OrderKinesisPayloadRecord + + def record_handler(record: OrderKinesisRecord): + return record.kinesis.data.item + + order_event = order_event_factory({"type": "success"}) + first_record = kinesis_event_factory(order_event) + second_record = kinesis_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.KinesisDataStreams, model=OrderKinesisRecord) + with processor(records, record_handler) as batch: + processed_messages = batch.process() + + # THEN + order_item = json.loads(order_event)["item"] + assert processed_messages == [ + ("success", order_item, first_record), + ("success", order_item, second_record), + ] + + assert batch.response() == {"batchItemFailures": []} + + +def test_batch_processor_kinesis_context_parser_model_with_failure(kinesis_event_factory, order_event_factory): + # GIVEN + class Order(BaseModel): + item: dict + + class OrderKinesisPayloadRecord(KinesisDataStreamRecordPayload): + data: Order + + # auto transform json string + # so Pydantic can auto-initialize nested Order model + @validator("data", pre=True) + def transform_message_to_dict(cls, value: str): + # Powertools KinesisDataStreamRecordModel + return json.loads(value) + + class OrderKinesisRecord(KinesisDataStreamRecordModel): + kinesis: OrderKinesisPayloadRecord + + def record_handler(record: OrderKinesisRecord): + if "fail" in record.kinesis.data.item["type"]: + raise Exception("Failed to process record.") + return record.kinesis.data.item + + order_event = order_event_factory({"type": "success"}) + order_event_fail = order_event_factory({"type": "fail"}) + + first_record = kinesis_event_factory(order_event_fail) + second_record = kinesis_event_factory(order_event) + records = [first_record, second_record] + + # WHEN + processor = BatchProcessor(event_type=EventType.KinesisDataStreams, model=OrderKinesisRecord) + with processor(records, record_handler) as batch: + batch.process() + + # THEN + assert len(batch.fail_messages) == 1 + assert batch.response() == {"batchItemFailures": [{"itemIdentifier": first_record["kinesis"]["sequenceNumber"]}]} + + +def test_batch_processor_error_when_entire_batch_fails(sqs_event_factory, record_handler): + # GIVEN + first_record = SQSRecord(sqs_event_factory("fail")) + second_record = SQSRecord(sqs_event_factory("fail")) + event = {"Records": [first_record.raw_event, second_record.raw_event]} + + processor = BatchProcessor(event_type=EventType.SQS) + + @batch_processor(record_handler=record_handler, processor=processor) + def lambda_handler(event, context): + return processor.response() + + # WHEN/THEN + with pytest.raises(BatchProcessingError) as e: + lambda_handler(event, {}) + ret = str(e) + assert ret is not None diff --git a/tests/functional/utils.py b/tests/functional/utils.py index a58d27f3526..5f1f21afc51 100644 --- a/tests/functional/utils.py +++ b/tests/functional/utils.py @@ -1,8 +1,29 @@ +import base64 +import hashlib import json from pathlib import Path from typing import Any +from aws_lambda_powertools.shared.json_encoder import Encoder + def load_event(file_name: str) -> Any: path = Path(str(Path(__file__).parent.parent) + "/events/" + file_name) return json.loads(path.read_text()) + + +def str_to_b64(data: str) -> str: + return base64.b64encode(data.encode()).decode("utf-8") + + +def b64_to_str(data: str) -> str: + return base64.b64decode(data.encode()).decode("utf-8") + + +def json_serialize(data): + return json.dumps(data, sort_keys=True, cls=Encoder) + + +def hash_idempotency_key(data: Any): + """Serialize data to JSON, encode, and hash it for idempotency key""" + return hashlib.md5(json_serialize(data).encode()).hexdigest() diff --git a/tests/unit/test_tracing.py b/tests/unit/test_tracing.py index 55273b072c6..2482b0177d3 100644 --- a/tests/unit/test_tracing.py +++ b/tests/unit/test_tracing.py @@ -2,6 +2,7 @@ import sys from typing import NamedTuple from unittest import mock +from unittest.mock import MagicMock import pytest @@ -628,3 +629,24 @@ def handler(event, context): # THEN assert in_subsegment_mock.put_annotation.call_count == 1 assert in_subsegment_mock.put_annotation.call_args == mocker.call(key="ColdStart", value=True) + + +@mock.patch("aws_xray_sdk.ext.httplib.add_ignored") +def test_ignore_endpoints_xray_sdk(mock_add_ignored: MagicMock): + # GIVEN a xray sdk provider + tracer = Tracer() + # WHEN we call ignore_endpoint + tracer.ignore_endpoint(hostname="https://www.foo.com/", urls=["/bar", "/ignored"]) + # THEN call xray add_ignored + assert mock_add_ignored.call_count == 1 + mock_add_ignored.assert_called_with(hostname="https://www.foo.com/", urls=["/bar", "/ignored"]) + + +@mock.patch("aws_xray_sdk.ext.httplib.add_ignored") +def test_ignore_endpoints_mocked_provider(mock_add_ignored: MagicMock, provider_stub, in_subsegment_mock): + # GIVEN a mock provider + tracer = Tracer(provider=provider_stub(in_subsegment=in_subsegment_mock.in_subsegment)) + # WHEN we call ignore_endpoint + tracer.ignore_endpoint(hostname="https://foo.com/") + # THEN don't call xray add_ignored + assert mock_add_ignored.call_count == 0 diff --git a/tests/utils.py b/tests/utils.py new file mode 100644 index 00000000000..e69de29bb2d