From c32711b223b36ff93cdc3f87653ea956927d00ee Mon Sep 17 00:00:00 2001 From: Ben Broderick Phillips Date: Thu, 3 Oct 2024 13:55:18 -0400 Subject: [PATCH] [stress] Sync namespace federated credentials periodically and on startup (#9063) * Watcher namespace fed cred management fixes * Update stress watcher dockerfile to use net 8 * Add back delete code, remove terminating skip * Bump cluster version to 1.29.8 --- .../stress-deployment-example/Chart.lock | 6 +- .../cluster/azure/cluster/cluster.bicep | 2 +- .../services/Stress.Watcher/Dockerfile | 4 +- .../src/NamespaceEventHandler.cs | 87 +++++++++++++++++-- .../services/Stress.Watcher/src/Program.cs | 11 +++ 5 files changed, 98 insertions(+), 12 deletions(-) diff --git a/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock index df25f68f412..d902eb841e5 100644 --- a/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock +++ b/tools/stress-cluster/chaos/examples/stress-deployment-example/Chart.lock @@ -1,6 +1,6 @@ dependencies: - name: stress-test-addons repository: https://stresstestcharts.blob.core.windows.net/helm/ - version: 0.3.2 -digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764 -generated: "2024-05-23T11:37:41.371010465-04:00" + version: 0.3.3 +digest: sha256:1cffb5ed8ea74953ab7611f9e2de2163af2c3f0918afb9928f71210da9c19a4a +generated: "2024-10-02T16:18:41.429777815-04:00" diff --git a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep index b63d3c64d84..1dbba42b87b 100644 --- a/tools/stress-cluster/cluster/azure/cluster/cluster.bicep +++ b/tools/stress-cluster/cluster/azure/cluster/cluster.bicep @@ -13,7 +13,7 @@ param updateNodes bool = false // monitoring parameters param workspaceId string -var kubernetesVersion = '1.29.4' +var kubernetesVersion = '1.29.8' var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}' var systemAgentPool = { diff --git a/tools/stress-cluster/services/Stress.Watcher/Dockerfile b/tools/stress-cluster/services/Stress.Watcher/Dockerfile index 7a1ccabbe8b..5418963f176 100644 --- a/tools/stress-cluster/services/Stress.Watcher/Dockerfile +++ b/tools/stress-cluster/services/Stress.Watcher/Dockerfile @@ -1,8 +1,8 @@ -FROM mcr.microsoft.com/dotnet/sdk:6.0-cbl-mariner2.0 AS build +FROM mcr.microsoft.com/dotnet/sdk:8.0-cbl-mariner2.0 AS build COPY ./src /src -RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net6.0 -p:PublishSingleFile=true --self-contained +RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net8.0 -p:PublishSingleFile=true --self-contained FROM mcr.microsoft.com/azure-cli:cbl-mariner2.0 diff --git a/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs b/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs index f415121f1d8..e58bc7e8b1c 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs +++ b/tools/stress-cluster/services/Stress.Watcher/src/NamespaceEventHandler.cs @@ -28,6 +28,8 @@ public class NamespaceEventHandler // Concurrent Federated Identity Credentials writes under the same managed identity are not supported private static readonly SemaphoreSlim FederatedCredentialWriteSemaphore = new(1, 1); + private Dictionary WorkloadAppCache = []; + public List WorkloadAppPool; public string WorkloadAppIssuer; @@ -62,6 +64,57 @@ public NamespaceEventHandler( .CreateLogger(); } + public async Task SyncCredentials() + { + try + { + Logger.Information($"Waiting for federated credential write semaphore"); + await FederatedCredentialWriteSemaphore.WaitAsync(); + await _syncCredentials(); + } + finally + { + Logger.Information("Releasing federated credential write semaphore"); + FederatedCredentialWriteSemaphore.Release(); + } + } + + public async Task _syncCredentials() + { + Logger.Information("Syncing namespaced federated credentials, this may take a minute..."); + + var namespaces = await Client.ListNamespaceAsync(); + foreach (var app in WorkloadAppPool) + { + var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app); + var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(resourceId); + var identityResource = await userAssignedIdentity.GetAsync(); + var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials(); + await foreach (var item in fedCreds.GetAllAsync()) + { + if (!namespaces.Items.Any(ns => item.Data.Name == CreateFederatedIdentityCredentialName(ns))) + { + if (!string.IsNullOrEmpty(WatchNamespace) && item.Data.Name != CreateFederatedIdentityCredentialName(WatchNamespace)) + { + Logger.Information($"Skipping delete federated credential '{item.Data.Name}' because it is not the watched namespace '{WatchNamespace}'"); + continue; + } + // Only perform delete operations for namespace state that may have changed if the watcher was not running. + // Any create operations will be handled after initialization as the watch stream processes all active namespaces on startup + Logger.Information($"Deleting federated identity credential '{item.Data.Name}' for managed identity '{app}' as the corresponding namespace no longer exists."); + WorkloadAppCache.Remove(item.Data.Name); + var lro = await item.DeleteAsync(Azure.WaitUntil.Completed); + } + else + { + WorkloadAppCache[item.Data.Name] = identityResource.Value; + } + } + } + + Logger.Information($"Federated credential sync complete. Cached {WorkloadAppCache.Count} federated credentials."); + } + public async Task Watch(CancellationToken cancellationToken) { string resourceVersion = null; @@ -116,7 +169,7 @@ public async Task Watch(CancellationToken cancellationToken) public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns) { - if (ExcludedNamespaces.Contains(ns.Name())) + if (ExcludedNamespaces.Contains(ns.Name()) || string.IsNullOrEmpty(ns.Name())) { return; } @@ -156,7 +209,12 @@ public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns) public string CreateFederatedIdentityCredentialName(V1Namespace ns) { - return $"stress-{ns.Name()}"; + return CreateFederatedIdentityCredentialName(ns.Name()); + } + + public string CreateFederatedIdentityCredentialName(string ns) + { + return $"stress-{ns}"; } public async Task InitializeWorkloadIdForNamespace(V1Namespace ns) @@ -175,7 +233,8 @@ public async Task InitializeWorkloadIdForNamespace(V1Namespace ns) var identityData = await selectedWorkloadIdentity.GetAsync(); var selectedWorkloadAppId = identityData.Value.Data.ClientId.ToString(); - var meta = new V1ObjectMeta(){ + var meta = new V1ObjectMeta() + { Name = ns.Name(), NamespaceProperty = ns.Name(), Annotations = new Dictionary(){ @@ -183,6 +242,12 @@ public async Task InitializeWorkloadIdForNamespace(V1Namespace ns) } }; var serviceAccount = new V1ServiceAccount(metadata: meta); + var allAccounts = await Client.ListNamespacedServiceAccountAsync(ns.Name()); + if (allAccounts.Items.Any(sa => sa.Name() == ns.Name())) + { + Logger.Information($"Service account '{ns.Name()}/{ns.Name()}' already exists, skipping creation."); + return; + } await Client.CreateNamespacedServiceAccountAsync(serviceAccount, ns.Name()); Logger.Information($"Created service account '{ns.Name()}/{ns.Name()}' with workload client id '{selectedWorkloadAppId}'"); } @@ -200,6 +265,12 @@ public async Task CreateFederatedIdentityCredentia Logger.Information($"Waiting for federated credential write semaphore"); await FederatedCredentialWriteSemaphore.WaitAsync(); + if (WorkloadAppCache.ContainsKey(credentialName)) + { + Logger.Information($"Found cache entry for federated credential {credentialName}, returning identity {WorkloadAppCache[credentialName].Data.ClientId}"); + return await WorkloadAppCache[credentialName].GetAsync(); + } + foreach (var workloadApp in WorkloadAppPool) { var userAssignedIdentityResourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, workloadApp); @@ -246,6 +317,7 @@ public async Task CreateFederatedIdentityCredentia Logger.Information($"Creating/updating federated identity credential '{credentialName}' " + $"with subject '{subject}' for managed identity '{selectedWorkloadApp}'"); var lro = await federatedIdentityCredential.UpdateAsync(Azure.WaitUntil.Completed, fedCredData); + WorkloadAppCache[credentialName] = selectedIdentity; Logger.Information($"Created federated identity credential '{lro.Value.Data.Name}'"); return selectedIdentity; @@ -253,8 +325,14 @@ public async Task CreateFederatedIdentityCredentia public async Task DeleteFederatedIdentityCredential(V1Namespace ns) { + Logger.Information($"Waiting for federated credential write semaphore"); + await FederatedCredentialWriteSemaphore.WaitAsync(); + var credentialName = CreateFederatedIdentityCredentialName(ns); var workloadApp = ""; + + WorkloadAppCache.Remove(credentialName); + foreach (var app in WorkloadAppPool) { var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app); @@ -284,9 +362,6 @@ public async Task DeleteFederatedIdentityCredential(V1Namespace ns) SubscriptionId, ClusterGroup, workloadApp, credentialName); var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId); - Logger.Information($"Waiting for federated credential write semaphore"); - await FederatedCredentialWriteSemaphore.WaitAsync(); - Logger.Information($"Deleting federated identity credential '{credentialName}' for managed identity '{workloadApp}'"); var lro = await federatedIdentityCredential.DeleteAsync(Azure.WaitUntil.Completed); Logger.Information($"Deleted federated identity credential '{credentialName}'"); diff --git a/tools/stress-cluster/services/Stress.Watcher/src/Program.cs b/tools/stress-cluster/services/Stress.Watcher/src/Program.cs index 4c8550a9f52..303d790a66b 100644 --- a/tools/stress-cluster/services/Stress.Watcher/src/Program.cs +++ b/tools/stress-cluster/services/Stress.Watcher/src/Program.cs @@ -79,6 +79,8 @@ static async Task Program(Options options) var namespaceEventHandler = new NamespaceEventHandler( client, armClient, workloadConfig.SubscriptionId, workloadConfig.ClusterGroup, workloadConfig.WorkloadAppPool, workloadConfig.WorkloadAppIssuer, options.Namespace); + await namespaceEventHandler.SyncCredentials(); + _ = PollAndSyncCredentials(namespaceEventHandler, 288); // poll every 12 hours var cts = new CancellationTokenSource(); var taskList = new List @@ -164,5 +166,14 @@ static WorkloadAuthConfig GetWorkloadConfigValues(Options options, Boolean isLoc ClusterGroup = clusterGroup }; } + + static async Task PollAndSyncCredentials(NamespaceEventHandler namespaceHandler, int minutes) + { + while (true) + { + await Task.Delay(TimeSpan.FromMinutes(minutes)); + await namespaceHandler.SyncCredentials(); + } + } } }