diff --git a/src/connectedk8s/HISTORY.rst b/src/connectedk8s/HISTORY.rst index 02f4ed9b2e6..1da3937e2e6 100644 --- a/src/connectedk8s/HISTORY.rst +++ b/src/connectedk8s/HISTORY.rst @@ -6,6 +6,8 @@ Release History 1.9.0 ++++++ * Added support for arc gateway feature. +* Bug fix for Workload Identity poll or agent state +* Bug fix for connectedk8s update with workload identity enabled 1.8.0 ++++++ diff --git a/src/connectedk8s/azext_connectedk8s/_constants.py b/src/connectedk8s/azext_connectedk8s/_constants.py index 4d26bce9176..e5922f70d1f 100644 --- a/src/connectedk8s/azext_connectedk8s/_constants.py +++ b/src/connectedk8s/azext_connectedk8s/_constants.py @@ -70,6 +70,9 @@ Delete_Kubernetes_Helm_Release_Namespace_Fault_Type = 'kubernetes-delete-helm-release-namespace-error' Update_Agent_Success = 'Agents for Connected Cluster {} have been updated successfully' Update_Agent_Failure = 'Error while updating agents. Please run \"kubectl get pods -n azure-arc\" to check the pods in case of timeout error. Error: {}' +Agent_State_Succeeded = "Succeeded" +Agent_State_Failed = "Failed" +Agent_State_Timeout = 15 Get_Credentials_Failed_Fault_Type = 'failed-to-get-list-cluster-user-credentials' Failed_To_Merge_Credentials_Fault_Type = "failed-to-merge-credentials" Kubeconfig_Failed_To_Load_Fault_Type = "failed-to-load-kubeconfig-file" diff --git a/src/connectedk8s/azext_connectedk8s/custom.py b/src/connectedk8s/azext_connectedk8s/custom.py index b24e4a7ed30..545bae82eb7 100644 --- a/src/connectedk8s/azext_connectedk8s/custom.py +++ b/src/connectedk8s/azext_connectedk8s/custom.py @@ -522,42 +522,42 @@ def create_connectedk8s(cmd, client, resource_group_name, cluster_name, correlat values_file, azure_cloud, enable_custom_locations, custom_locations_oid, helm_client_location, enable_private_link, arm_metadata, onboarding_timeout, helm_content_values) - # Add 2nd long running operation to wait for Agent State to reach terminal stage with a default 20 minute timeout window - # TODO: Update poll_for_agent_state method to check for agent state and not provisioning state when feedback loop is implemented - if enable_oidc_issuer or enable_workload_identity: - print("Waiting for Agent State to reach terminal state") - if not poll_for_agent_state(cmd, resource_group_name, cluster_name): - print("Timed out waiting for Agent State to reach terminal state") - return False - - return put_cc_response - - -def poll_for_agent_state(cmd, resource_group_name, cluster_name, timeout_minutes=20, interval=5): """ - Polls for agent state to reach a terminal state with a timeout. - - Args: - resource_group_name: Name of cluster's resource group - cluster_name: Name of cluster resource - timeout_minutes (int, optional): Timeout in minutes. Defaults to 10 minutes. - interval (float, optional): Time interval between attempts in seconds. Defaults to 1. + Long Running Operation for Agent State + Agent state is used for feedback of workload identity extension installation + Cases for when to poll for agent state: + - If OIDC is enabled and self hosted issuer is passed in, extension is not installed. + Feedback loop is not enabled, do not poll for agent state. + - If OIDC is enabled and self hosted issuer is empty, extension is installed. + Need to poll for agent state. + - If workload identity is enabled, extension is installed, poll for agent state. """ + if (enable_oidc_issuer and self_hosted_issuer == "") or enable_workload_identity: + print("Step: {}: Wait for Agent State to reach terminal state, with timeout of {}".format(utils.get_utctimestring(), + consts.Agent_State_Timeout)) + if poll_for_agent_state(cmd, resource_group_name, cluster_name): + connected_cluster = get_connectedk8s_2024_07_01(cmd, resource_group_name, cluster_name) + print("Step: {}: Agent state has reached terminal state of {}.".format(utils.get_utctimestring(), + connected_cluster.arc_agent_profile.agent_state)) + else: + raise CLIInternalError("Timed out waiting for Agent State to reach terminal state.") + + return put_cc_response + +def poll_for_agent_state(cmd, resource_group_name, cluster_name, timeout_minutes=consts.Agent_State_Timeout, + interval=5): start_time = time.time() while True: connected_cluster = get_connectedk8s_2024_07_01(cmd, resource_group_name, cluster_name) - # Checking for provisioning state now as agent state feedback loop is not implemented - if connected_cluster.provisioning_state == "Succeeded" or connected_cluster.provisioning_state == "Failed": - print("Agent state has reached terminal state of: " + connected_cluster.provisioning_state) + if connected_cluster.arc_agent_profile.agent_state == consts.Agent_State_Succeeded or \ + connected_cluster.arc_agent_profile.agent_state == consts.Agent_State_Failed: return True elapsed_time = time.time() - start_time if elapsed_time >= timeout_minutes * 60: - print("Agent state has not reached terminal state within " + timeout_minutes + " minute timeout window: " + connected_cluster.provisioning_state) return False time.sleep(interval) - def validate_existing_provisioned_cluster_for_reput(cluster_resource, kubernetes_distro, kubernetes_infra, enable_private_link, private_link_scope_resource_id, distribution_version, azure_hybrid_benefit, location): @@ -897,24 +897,20 @@ def check_arm64_node(api_response): def set_oidc_issuer_profile(enable_oidc_issuer, self_hosted_issuer=""): - oidc_profile = None - if enable_oidc_issuer: - oidc_profile = OidcIssuerProfile( - enabled=True - ) - if self_hosted_issuer != "": - oidc_profile.self_hosted_issuer_url = self_hosted_issuer + oidc_profile = OidcIssuerProfile( + enabled=enable_oidc_issuer + ) + if self_hosted_issuer != "": + oidc_profile.self_hosted_issuer_url = self_hosted_issuer return oidc_profile def set_security_profile(enable_workload_identity): - security_profile = None - if enable_workload_identity: - security_profile = SecurityProfile( - workload_identity=SecurityProfileWorkloadIdentity( - enabled=True - ) + security_profile = SecurityProfile( + workload_identity=SecurityProfileWorkloadIdentity( + enabled=enable_workload_identity ) + ) return security_profile @@ -958,12 +954,13 @@ def generate_request_payload(location, public_key, tags, kubernetes_distro, kube if enable_private_link is not None or distribution_version is not None or azure_hybrid_benefit is not None or enable_oidc_issuer or enable_workload_identity or gateway is not None or arc_agentry_configurations is not None or arc_agent_profile is not None: # Set additional parameters - private_link_state = None + private_link_state, oidc_issuer, security_profile = None, None, None if enable_private_link is not None: private_link_state = "Enabled" if enable_private_link is True else "Disabled" - - oidc_profile = set_oidc_issuer_profile(enable_oidc_issuer, self_hosted_issuer) - security_profile = set_security_profile(enable_workload_identity) + if enable_oidc_issuer: + oidc_issuer = set_oidc_issuer_profile(enable_oidc_issuer, self_hosted_issuer) + if enable_workload_identity: + security_profile = set_security_profile(enable_workload_identity) cc = ConnectedCluster2024_07_01_Preview( location=location, @@ -976,11 +973,11 @@ def generate_request_payload(location, public_key, tags, kubernetes_distro, kube private_link_state=private_link_state, azure_hybrid_benefit=azure_hybrid_benefit, distribution_version=distribution_version, - oidc_issuer_profile=oidc_profile, - security_profile=security_profile, arc_agent_profile=arc_agent_profile, gateway=gateway, - arc_agentry_configurations=arc_agentry_configurations + arc_agentry_configurations=arc_agentry_configurations, + oidc_issuer_profile=oidc_issuer, + security_profile=security_profile ) return cc @@ -989,12 +986,10 @@ def generate_request_payload(location, public_key, tags, kubernetes_distro, kube def generate_reput_request_payload(cc, enable_oidc_issuer, enable_workload_identity, self_hosted_issuer, gateway, arc_agentry_configurations, arc_agent_profile): # Update connected cluster resource object if enable_oidc_issuer is not None: - oidc_profile = set_oidc_issuer_profile(enable_oidc_issuer, self_hosted_issuer) - cc.oidc_issuer_profile = oidc_profile + cc.oidc_issuer_profile = set_oidc_issuer_profile(enable_oidc_issuer, self_hosted_issuer) if enable_workload_identity is not None: - security_profile = set_security_profile(enable_workload_identity) - cc.security_profile = security_profile + cc.security_profile = set_security_profile(enable_workload_identity) if gateway is not None: cc.gateway = gateway @@ -1478,14 +1473,25 @@ def update_connected_cluster(cmd, client, resource_group_name, cluster_name, htt if not arm_properties_unset: return patch_cc_response - # Add 2nd long running operation to wait for Agent State to reach terminal stage with a default 20 minute timeout window - # TODO: Update poll_for_agent_state method to check for agent state and not provisioning state when feedback loop is implemented - # This condition will be hit after provisioning state - if enable_oidc_issuer or enable_workload_identity: - print("Hold for Agent State to reach terminal state") - if not poll_for_agent_state(cmd, resource_group_name, cluster_name): - print("Timed out waiting for Agent State to reach terminal state") - return False + """ + Long Running Operation for Agent State + Agent state is used for feedback of workload identity extension installation + Cases for when to poll for agent state: + - If OIDC is enabled and self hosted issuer is passed in, extension is not installed. + Feedback loop is not enabled, do not poll for agent state. + - If OIDC is enabled and self hosted issuer is empty, extension is installed. + Need to poll for agent state. + - If workload identity is enabled, extension is installed, poll for agent state. + """ + if (enable_oidc_issuer and self_hosted_issuer == "") or enable_workload_identity: + print("Step: {}: Wait for Agent State to reach terminal state, with timeout of {}".format(utils.get_utctimestring(), + consts.Agent_State_Timeout)) + if poll_for_agent_state(cmd, resource_group_name, cluster_name): + connected_cluster = get_connectedk8s_2024_07_01(cmd, resource_group_name, cluster_name) + print("Step: {}: Agent state has reached terminal state of {}.".format(utils.get_utctimestring(), + connected_cluster.arc_agent_profile.agent_state)) + else: + raise CLIInternalError("Timed out waiting for Agent State to reach terminal state.") return reput_cc_response