Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stress] Sync namespace federated credentials periodically and on startup #9063

Merged
merged 4 commits into from
Oct 3, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
dependencies:
- name: stress-test-addons
repository: https://stresstestcharts.blob.core.windows.net/helm/
version: 0.3.2
digest: sha256:6eee71a7e8a4c0dc06d5fbbce39ef63237a0db0b7fc2da66e98e96b68985b764
generated: "2024-05-23T11:37:41.371010465-04:00"
version: 0.3.3
digest: sha256:1cffb5ed8ea74953ab7611f9e2de2163af2c3f0918afb9928f71210da9c19a4a
generated: "2024-10-02T16:18:41.429777815-04:00"
2 changes: 1 addition & 1 deletion tools/stress-cluster/cluster/azure/cluster/cluster.bicep
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ param updateNodes bool = false
// monitoring parameters
param workspaceId string

var kubernetesVersion = '1.29.4'
var kubernetesVersion = '1.29.8'
var nodeResourceGroup = 'rg-nodes-${dnsPrefix}-${clusterName}-${groupSuffix}'

var systemAgentPool = {
Expand Down
4 changes: 2 additions & 2 deletions tools/stress-cluster/services/Stress.Watcher/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
FROM mcr.microsoft.com/dotnet/sdk:6.0-cbl-mariner2.0 AS build
FROM mcr.microsoft.com/dotnet/sdk:8.0-cbl-mariner2.0 AS build

COPY ./src /src

RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net6.0 -p:PublishSingleFile=true --self-contained
RUN cd /src && dotnet publish -c Release -o /stresswatcher -r linux-x64 -f net8.0 -p:PublishSingleFile=true --self-contained

FROM mcr.microsoft.com/azure-cli:cbl-mariner2.0

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ public class NamespaceEventHandler
// Concurrent Federated Identity Credentials writes under the same managed identity are not supported
private static readonly SemaphoreSlim FederatedCredentialWriteSemaphore = new(1, 1);

private Dictionary<string, UserAssignedIdentityResource> WorkloadAppCache = [];

public List<string> WorkloadAppPool;
public string WorkloadAppIssuer;

Expand Down Expand Up @@ -62,6 +64,57 @@ public NamespaceEventHandler(
.CreateLogger();
}

public async Task SyncCredentials()
{
try
{
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();
await _syncCredentials();
}
finally
{
Logger.Information("Releasing federated credential write semaphore");
FederatedCredentialWriteSemaphore.Release();
}
}

public async Task _syncCredentials()
{
Logger.Information("Syncing namespaced federated credentials, this may take a minute...");

var namespaces = await Client.ListNamespaceAsync();
foreach (var app in WorkloadAppPool)
{
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
var userAssignedIdentity = ArmClient.GetUserAssignedIdentityResource(resourceId);
var identityResource = await userAssignedIdentity.GetAsync();
var fedCreds = userAssignedIdentity.GetFederatedIdentityCredentials();
await foreach (var item in fedCreds.GetAllAsync())
{
if (!namespaces.Items.Any(ns => item.Data.Name == CreateFederatedIdentityCredentialName(ns)))
{
if (!string.IsNullOrEmpty(WatchNamespace) && item.Data.Name != CreateFederatedIdentityCredentialName(WatchNamespace))
{
Logger.Information($"Skipping delete federated credential '{item.Data.Name}' because it is not the watched namespace '{WatchNamespace}'");
continue;
}
// Only perform delete operations for namespace state that may have changed if the watcher was not running.
// Any create operations will be handled after initialization as the watch stream processes all active namespaces on startup
Logger.Information($"Deleting federated identity credential '{item.Data.Name}' for managed identity '{app}' as the corresponding namespace no longer exists.");
WorkloadAppCache.Remove(item.Data.Name);
var lro = await item.DeleteAsync(Azure.WaitUntil.Completed);
}
else
{
WorkloadAppCache[item.Data.Name] = identityResource.Value;
}
}
}

Logger.Information($"Federated credential sync complete. Cached {WorkloadAppCache.Count} federated credentials.");
}

public async Task Watch(CancellationToken cancellationToken)
{
string resourceVersion = null;
Expand Down Expand Up @@ -116,7 +169,7 @@ public async Task Watch(CancellationToken cancellationToken)

public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)
{
if (ExcludedNamespaces.Contains(ns.Name()))
if (ExcludedNamespaces.Contains(ns.Name()) || string.IsNullOrEmpty(ns.Name()))
{
return;
}
Expand Down Expand Up @@ -156,7 +209,12 @@ public void HandleNamespaceEvent(WatchEventType eventType, V1Namespace ns)

public string CreateFederatedIdentityCredentialName(V1Namespace ns)
{
return $"stress-{ns.Name()}";
return CreateFederatedIdentityCredentialName(ns.Name());
}

public string CreateFederatedIdentityCredentialName(string ns)
{
return $"stress-{ns}";
}

public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
Expand All @@ -175,14 +233,21 @@ public async Task InitializeWorkloadIdForNamespace(V1Namespace ns)
var identityData = await selectedWorkloadIdentity.GetAsync();
var selectedWorkloadAppId = identityData.Value.Data.ClientId.ToString();

var meta = new V1ObjectMeta(){
var meta = new V1ObjectMeta()
{
Name = ns.Name(),
NamespaceProperty = ns.Name(),
Annotations = new Dictionary<string, string>(){
{ "azure.workload.identity/client-id", selectedWorkloadAppId }
}
};
var serviceAccount = new V1ServiceAccount(metadata: meta);
var allAccounts = await Client.ListNamespacedServiceAccountAsync(ns.Name());
if (allAccounts.Items.Any(sa => sa.Name() == ns.Name()))
{
Logger.Information($"Service account '{ns.Name()}/{ns.Name()}' already exists, skipping creation.");
return;
}
await Client.CreateNamespacedServiceAccountAsync(serviceAccount, ns.Name());
Logger.Information($"Created service account '{ns.Name()}/{ns.Name()}' with workload client id '{selectedWorkloadAppId}'");
}
Expand All @@ -200,6 +265,12 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

if (WorkloadAppCache.ContainsKey(credentialName))
{
Logger.Information($"Found cache entry for federated credential {credentialName}, returning identity {WorkloadAppCache[credentialName].Data.ClientId}");
return await WorkloadAppCache[credentialName].GetAsync();
}

foreach (var workloadApp in WorkloadAppPool)
{
var userAssignedIdentityResourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, workloadApp);
Expand Down Expand Up @@ -246,15 +317,22 @@ public async Task<UserAssignedIdentityResource> CreateFederatedIdentityCredentia
Logger.Information($"Creating/updating federated identity credential '{credentialName}' " +
$"with subject '{subject}' for managed identity '{selectedWorkloadApp}'");
var lro = await federatedIdentityCredential.UpdateAsync(Azure.WaitUntil.Completed, fedCredData);
WorkloadAppCache[credentialName] = selectedIdentity;
Logger.Information($"Created federated identity credential '{lro.Value.Data.Name}'");

return selectedIdentity;
}

public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
{
Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

var credentialName = CreateFederatedIdentityCredentialName(ns);
var workloadApp = "";

WorkloadAppCache.Remove(credentialName);

foreach (var app in WorkloadAppPool)
{
var resourceId = UserAssignedIdentityResource.CreateResourceIdentifier(SubscriptionId, ClusterGroup, app);
Expand Down Expand Up @@ -284,9 +362,6 @@ public async Task DeleteFederatedIdentityCredential(V1Namespace ns)
SubscriptionId, ClusterGroup, workloadApp, credentialName);
var federatedIdentityCredential = ArmClient.GetFederatedIdentityCredentialResource(federatedIdentityCredentialResourceId);

Logger.Information($"Waiting for federated credential write semaphore");
await FederatedCredentialWriteSemaphore.WaitAsync();

Logger.Information($"Deleting federated identity credential '{credentialName}' for managed identity '{workloadApp}'");
var lro = await federatedIdentityCredential.DeleteAsync(Azure.WaitUntil.Completed);
Logger.Information($"Deleted federated identity credential '{credentialName}'");
Expand Down
11 changes: 11 additions & 0 deletions tools/stress-cluster/services/Stress.Watcher/src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ static async Task Program(Options options)
var namespaceEventHandler = new NamespaceEventHandler(
client, armClient, workloadConfig.SubscriptionId, workloadConfig.ClusterGroup,
workloadConfig.WorkloadAppPool, workloadConfig.WorkloadAppIssuer, options.Namespace);
await namespaceEventHandler.SyncCredentials();
_ = PollAndSyncCredentials(namespaceEventHandler, 288); // poll every 12 hours

var cts = new CancellationTokenSource();
var taskList = new List<Task>
Expand Down Expand Up @@ -164,5 +166,14 @@ static WorkloadAuthConfig GetWorkloadConfigValues(Options options, Boolean isLoc
ClusterGroup = clusterGroup
};
}

static async Task PollAndSyncCredentials(NamespaceEventHandler namespaceHandler, int minutes)
{
while (true)
{
await Task.Delay(TimeSpan.FromMinutes(minutes));
await namespaceHandler.SyncCredentials();
}
}
}
}