Skip to content

Commit

Permalink
[stress] Sync namespace federated credentials periodically and on sta…
Browse files Browse the repository at this point in the history
…rtup (#9063)

* Watcher namespace fed cred management fixes

* Update stress watcher dockerfile to use net 8

* Add back delete code, remove terminating skip

* Bump cluster version to 1.29.8
  • Loading branch information
benbp authored Oct 3, 2024
1 parent a76f3d2 commit c32711b
Show file tree
Hide file tree
Showing 5 changed files with 98 additions and 12 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
dependencies:
- name: stress-test-addons
repository: https://stresstestcharts.blob.core.windows.net/helm/
version: 0.3.2
digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764
generated: "2024-05-23T11:37:41.371010465-04:00"
version: 0.3.3
digest: sha256:1cffb5ed8ea74953ab7611f9e2de2163af2c3f0918afb9928f71210da9c19a4a
generated: "2024-10-02T16:18:41.429777815-04:00"
2 changes: 1 addition & 1 deletion tools/stress-cluster/cluster/azure/cluster/cluster.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ param updateNodes bool = false
// monitoring parameters
param workspaceId string

var kubernetesVersion = '1.29.4'
var kubernetesVersion = '1.29.8'
var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}'

var systemAgentPool = {
Expand Down
4 changes: 2 additions & 2 deletions tools/stress-cluster/services/Stress.Watcher/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-cbl-mariner2.0 AS build
FROM mcr.microsoft.com/dotnet/sdk:8.0-cbl-mariner2.0 AS build

COPY ./src /src

RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net6.0 -p:PublishSingleFile=true --self-contained
RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net8.0 -p:PublishSingleFile=true --self-contained

FROM mcr.microsoft.com/azure-cli:cbl-mariner2.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class NamespaceEventHandler
// Concurrent Federated Identity Credentials writes under the same managed identity are not supported
private static readonly SemaphoreSlim FederatedCredentialWriteSemaphore = new(1, 1);

private Dictionary<string, UserAssignedIdentityResource> WorkloadAppCache = [];

public List<string> WorkloadAppPool;
public string WorkloadAppIssuer;

Expand Down Expand Up @@ -62,6 +64,57 @@ public NamespaceEventHandler(
.CreateLogger();
}

public async Task SyncCredentials()
{
try
{
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();
await _syncCredentials();
}
finally
{
Logger.Information("Releasing federated credential write semaphore");
FederatedCredentialWriteSemaphore.Release();
}
}

public async Task _syncCredentials()
{
Logger.Information("Syncing namespaced federated credentials, this may take a minute...");

var namespaces = await Client.ListNamespaceAsync();
foreach (var app in WorkloadAppPool)
{
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(resourceId);
var identityResource = await userAssignedIdentity.GetAsync();
var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials();
await foreach (var item in fedCreds.GetAllAsync())
{
if (!namespaces.Items.Any(ns => item.Data.Name == CreateFederatedIdentityCredentialName(ns)))
{
if (!string.IsNullOrEmpty(WatchNamespace) && item.Data.Name != CreateFederatedIdentityCredentialName(WatchNamespace))
{
Logger.Information($"Skipping delete federated credential '{item.Data.Name}' because it is not the watched namespace '{WatchNamespace}'");
continue;
}
// Only perform delete operations for namespace state that may have changed if the watcher was not running.
// Any create operations will be handled after initialization as the watch stream processes all active namespaces on startup
Logger.Information($"Deleting federated identity credential '{item.Data.Name}' for managed identity '{app}' as the corresponding namespace no longer exists.");
WorkloadAppCache.Remove(item.Data.Name);
var lro = await item.DeleteAsync(Azure.WaitUntil.Completed);
}
else
{
WorkloadAppCache[item.Data.Name] = identityResource.Value;
}
}
}

Logger.Information($"Federated credential sync complete. Cached {WorkloadAppCache.Count} federated credentials.");
}

public async Task Watch(CancellationToken cancellationToken)
{
string resourceVersion = null;
Expand Down Expand Up @@ -116,7 +169,7 @@ public async Task Watch(CancellationToken cancellationToken)

public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)
{
if (ExcludedNamespaces.Contains(ns.Name()))
if (ExcludedNamespaces.Contains(ns.Name()) || string.IsNullOrEmpty(ns.Name()))
{
return;
}
Expand Down Expand Up @@ -156,7 +209,12 @@ public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)

public string CreateFederatedIdentityCredentialName(V1Namespace ns)
{
return $"stress-{ns.Name()}";
return CreateFederatedIdentityCredentialName(ns.Name());
}

public string CreateFederatedIdentityCredentialName(string ns)
{
return $"stress-{ns}";
}

public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
Expand All @@ -175,14 +233,21 @@ public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
var identityData = await selectedWorkloadIdentity.GetAsync();
var selectedWorkloadAppId = identityData.Value.Data.ClientId.ToString();

var meta = new V1ObjectMeta(){
var meta = new V1ObjectMeta()
{
Name = ns.Name(),
NamespaceProperty = ns.Name(),
Annotations = new Dictionary<string, string>(){
{ "azure.workload.identity/client-id", selectedWorkloadAppId }
}
};
var serviceAccount = new V1ServiceAccount(metadata: meta);
var allAccounts = await Client.ListNamespacedServiceAccountAsync(ns.Name());
if (allAccounts.Items.Any(sa => sa.Name() == ns.Name()))
{
Logger.Information($"Service account '{ns.Name()}/{ns.Name()}' already exists, skipping creation.");
return;
}
await Client.CreateNamespacedServiceAccountAsync(serviceAccount, ns.Name());
Logger.Information($"Created service account '{ns.Name()}/{ns.Name()}' with workload client id '{selectedWorkloadAppId}'");
}
Expand All @@ -200,6 +265,12 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

if (WorkloadAppCache.ContainsKey(credentialName))
{
Logger.Information($"Found cache entry for federated credential {credentialName}, returning identity {WorkloadAppCache[credentialName].Data.ClientId}");
return await WorkloadAppCache[credentialName].GetAsync();
}

foreach (var workloadApp in WorkloadAppPool)
{
var userAssignedIdentityResourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, workloadApp);
Expand Down Expand Up @@ -246,15 +317,22 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
Logger.Information($"Creating/updating federated identity credential '{credentialName}' " +
$"with subject '{subject}' for managed identity '{selectedWorkloadApp}'");
var lro = await federatedIdentityCredential.UpdateAsync(Azure.WaitUntil.Completed, fedCredData);
WorkloadAppCache[credentialName] = selectedIdentity;
Logger.Information($"Created federated identity credential '{lro.Value.Data.Name}'");

return selectedIdentity;
}

public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
{
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

var credentialName = CreateFederatedIdentityCredentialName(ns);
var workloadApp = "";

WorkloadAppCache.Remove(credentialName);

foreach (var app in WorkloadAppPool)
{
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
Expand Down Expand Up @@ -284,9 +362,6 @@ public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
SubscriptionId, ClusterGroup, workloadApp, credentialName);
var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId);

Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

Logger.Information($"Deleting federated identity credential '{credentialName}' for managed identity '{workloadApp}'");
var lro = await federatedIdentityCredential.DeleteAsync(Azure.WaitUntil.Completed);
Logger.Information($"Deleted federated identity credential '{credentialName}'");
Expand Down
11 changes: 11 additions & 0 deletions tools/stress-cluster/services/Stress.Watcher/src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ static async Task Program(Options options)
var namespaceEventHandler = new NamespaceEventHandler(
client, armClient, workloadConfig.SubscriptionId, workloadConfig.ClusterGroup,
workloadConfig.WorkloadAppPool, workloadConfig.WorkloadAppIssuer, options.Namespace);
await namespaceEventHandler.SyncCredentials();
_ = PollAndSyncCredentials(namespaceEventHandler, 288); // poll every 12 hours

var cts = new CancellationTokenSource();
var taskList = new List<Task>
Expand Down Expand Up @@ -164,5 +166,14 @@ static WorkloadAuthConfig GetWorkloadConfigValues(Options options, Boolean isLoc
ClusterGroup = clusterGroup
};
}

static async Task PollAndSyncCredentials(NamespaceEventHandler namespaceHandler, int minutes)
{
while (true)
{
await Task.Delay(TimeSpan.FromMinutes(minutes));
await namespaceHandler.SyncCredentials();
}
}
}
}

0 comments on commit c32711b

Please sign in to comment.