From 43d7d050228c400f479c3f2d955bd0650b2dc4cb Mon Sep 17 00:00:00 2001 From: Simon Kok Date: Thu, 24 Oct 2024 22:50:55 +0200 Subject: [PATCH] Fix jump role manager AWS Organizations API retries ## Why? When multiple accounts are bootstrapped by ADF via changes in the AWS Oranizations hierarchy, the jump-role-manager could run into rate limits of the AWS Organizations API. ## What? This change will ensure that the lambda function will retry more often. While using exponential back-off and jitter as built-in by boto3 and as configured in the Step Function retry logic. --- src/lambda_codebase/jump_role_manager/main.py | 8 +++++++- src/template.yml | 5 +++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/src/lambda_codebase/jump_role_manager/main.py b/src/lambda_codebase/jump_role_manager/main.py index 92937c9b4..13f6c1789 100644 --- a/src/lambda_codebase/jump_role_manager/main.py +++ b/src/lambda_codebase/jump_role_manager/main.py @@ -32,6 +32,7 @@ from aws_xray_sdk.core import patch_all import boto3 +from botocore.config import Config from botocore.exceptions import ClientError # ADF imports @@ -79,8 +80,13 @@ / CHARS_PER_ACCOUNT_ID, ) +BOTO_ORG_CONFIG = Config( + retries={ + "max_attempts": 15, + }, +) IAM_CLIENT = boto3.client("iam") -ORGANIZATIONS_CLIENT = boto3.client("organizations") +ORGANIZATIONS_CLIENT = boto3.client("organizations", config=BOTO_ORG_CONFIG) TAGGING_CLIENT = boto3.client("resourcegroupstaggingapi") CODEPIPELINE_CLIENT = boto3.client("codepipeline") diff --git a/src/template.yml b/src/template.yml index f1118633c..8c46d8837 100644 --- a/src/template.yml +++ b/src/template.yml @@ -920,6 +920,11 @@ Resources: "TimeoutSeconds": 300, "Retry": [ { + "ErrorEquals": ["States.TaskFailed"], + "IntervalSeconds": 3, + "BackoffRate": 2, + "MaxAttempts": 10 + }, { "ErrorEquals": [ "Lambda.Unknown", "Lambda.ServiceException",