From 1d0ee782db5cb87d329a1d91dd86c2f826a9d249 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 14 Apr 2022 15:57:26 +0800 Subject: [PATCH 01/18] remove relay --- src/k8s-extension/HISTORY.rst | 2 +- .../azext_k8s_extension/custom.py | 1 + .../partner_extensions/AzureMLKubernetes.py | 46 +++++++++++++++---- .../partner_extensions/DefaultExtension.py | 1 + .../PartnerExtensionModel.py | 1 + 5 files changed, 40 insertions(+), 11 deletions(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index 58a0fb647e0..3dafa54e9ca 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -6,7 +6,7 @@ Release History 1.1.0 ++++++++++++++++++ * Migrate Extensions api-version to 2022-03-01 -* microsoft.azureml.kubernetes: Remove inference private review warning message +* microsoft.azureml.kubernetes: Remove inference private review warning message, disable service bus by default * microsoft.openservicemesh: Enable System-assigned identity 1.0.4 diff --git a/src/k8s-extension/azext_k8s_extension/custom.py b/src/k8s-extension/azext_k8s_extension/custom.py index eb0955dabe8..e0c7d62e3b7 100644 --- a/src/k8s-extension/azext_k8s_extension/custom.py +++ b/src/k8s-extension/azext_k8s_extension/custom.py @@ -296,6 +296,7 @@ def update_k8s_extension( version, config_settings, config_protected_settings, + extension, yes, ) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index ac71251ecbd..30b02ba5122 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -66,6 +66,10 @@ def __init__(self): self.RELAY_SERVER_CONNECTION_STRING = 'relayServerConnectionString' # create relay connection string if None self.SERVICE_BUS_CONNECTION_STRING = 'serviceBusConnectionString' # create service bus if None self.LOG_ANALYTICS_WS_ENABLED = 'logAnalyticsWS' # create log analytics workspace if true + # default to false when creating the extension + self.SERVICE_BUS_ENABLED = 'servicebus.enabled' + # default to false if cluster is AKS when creating the extension + self.RELAY_SERVER_ENABLED = 'relayserver.enabled' # constants for azure resources creation self.RELAY_HC_AUTH_NAME = 'azureml_rw' @@ -107,8 +111,8 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t if scope == 'namespace': raise InvalidArgumentValueError("Invalid scope '{}'. This extension can't be installed " "only at 'cluster' scope.".format(scope)) - if not release_namespace: - release_namespace = self.DEFAULT_RELEASE_NAMESPACE + # set release name explicitly to azureml + release_namespace = self.DEFAULT_RELEASE_NAMESPACE scope_cluster = ScopeCluster(release_namespace=release_namespace) ext_scope = Scope(cluster=scope_cluster, namespace=None) @@ -126,9 +130,17 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t resource = resources.get_by_id( cluster_resource_id, parent_api_version) cluster_location = resource.location.lower() - if resource.properties['totalNodeCount'] == 1 or resource.properties['totalNodeCount'] == 2: - configuration_settings['clusterPurpose'] = 'DevTest' - if resource.properties['distribution'].lower() == "openshift": + # TODO(do not merge): for testing purpose only, do not merge it. + if cluster_type == "connectedClusters": + if resource.properties.get('totalNodeCount', 0) < 3: + configuration_settings['clusterPurpose'] = 'DevTest' + else: + total_node_count = 0 + for agent_pool in resource.properties.get('agentPoolProfiles', []): + total_node_count += agent_pool.get('count', 0) + if total_node_count < 3: + configuration_settings['clusterPurpose'] = 'DevTest' + if resource.properties.get('distribution', '').lower() == "openshift": configuration_settings[self.OPEN_SHIFT] = "true" except CloudError as ex: raise ex @@ -142,6 +154,16 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t self.JOB_SCHEDULER_LOCATION_KEY, cluster_location) configuration_settings[self.CLUSTER_NAME_FRIENDLY_KEY] = configuration_settings.get( self.CLUSTER_NAME_FRIENDLY_KEY, cluster_name) + # do not enable service bus by default + configuration_settings[self.SERVICE_BUS_ENABLED] = configuration_settings.get(self.SERVICE_BUS_ENABLED, 'false') + + # do not enable relay for managed cluster(AKS) by default + if cluster_type == "managedClusters": + configuration_settings[self.RELAY_SERVER_ENABLED] = configuration_settings.get(self.RELAY_SERVER_ENABLED, + 'false') + else: + configuration_settings[self.RELAY_SERVER_ENABLED] = configuration_settings.get(self.RELAY_SERVER_ENABLED, + 'true') # create Azure resources need by the extension based on the config. self.__create_required_resource( @@ -178,7 +200,7 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t user_confirmation_factory(cmd, yes) def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_version, release_train, version, configuration_settings, - configuration_protected_settings, yes=False): + configuration_protected_settings, original_extension, yes=False): self.__normalize_config(configuration_settings, configuration_protected_settings) # Prompt message to ask customer to confirm again @@ -273,7 +295,8 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers except azure.core.exceptions.HttpResponseError: logger.info("Failed to get log analytics connection string.") - if self.RELAY_SERVER_CONNECTION_STRING not in configuration_protected_settings: + if original_extension.configuration_settings.get(self.RELAY_SERVER_ENABLED).lower() != 'false' \ + and self.RELAY_SERVER_CONNECTION_STRING not in configuration_protected_settings: try: relay_connection_string, _, _ = _get_relay_connection_str( cmd, subscription_id, resource_group_name, cluster_name, '', self.RELAY_HC_AUTH_NAME, True) @@ -284,7 +307,8 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers raise ResourceNotFoundError("Relay server not found.") from ex raise AzureResponseError("Failed to get relay connection string.") from ex - if self.SERVICE_BUS_CONNECTION_STRING not in configuration_protected_settings: + if original_extension.configuration_settings.get(self.SERVICE_BUS_ENABLED).lower() != 'false' \ + and self.SERVICE_BUS_CONNECTION_STRING not in configuration_protected_settings: try: service_bus_connection_string, _ = _get_service_bus_connection_string( cmd, subscription_id, resource_group_name, cluster_name, '', {}, True) @@ -486,7 +510,8 @@ def __create_required_resource( configuration_settings[self.AZURE_LOG_ANALYTICS_CUSTOMER_ID_KEY] = ws_costumer_id configuration_protected_settings[self.AZURE_LOG_ANALYTICS_CONNECTION_STRING] = shared_key - if not configuration_settings.get(self.RELAY_SERVER_CONNECTION_STRING) and \ + if str(configuration_settings.get(self.RELAY_SERVER_ENABLED)).lower() != 'false' and \ + not configuration_settings.get(self.RELAY_SERVER_CONNECTION_STRING) and \ not configuration_protected_settings.get(self.RELAY_SERVER_CONNECTION_STRING): logger.info('==== BEGIN RELAY CREATION ====') relay_connection_string, hc_resource_id, hc_name = _get_relay_connection_str( @@ -496,7 +521,8 @@ def __create_required_resource( configuration_settings[self.HC_RESOURCE_ID_KEY] = hc_resource_id configuration_settings[self.RELAY_HC_NAME_KEY] = hc_name - if not configuration_settings.get(self.SERVICE_BUS_CONNECTION_STRING) and \ + if str(configuration_settings.get(self.SERVICE_BUS_ENABLED)).lower() != 'false' and \ + not configuration_settings.get(self.SERVICE_BUS_CONNECTION_STRING) and \ not configuration_protected_settings.get(self.SERVICE_BUS_CONNECTION_STRING): logger.info('==== BEGIN SERVICE BUS CREATION ====') topic_sub_mapping = { diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/DefaultExtension.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/DefaultExtension.py index a15defc72d2..bae3fe4d1f0 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/DefaultExtension.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/DefaultExtension.py @@ -71,6 +71,7 @@ def Update( version, configuration_settings, configuration_protected_settings, + original_extension: Extension, yes=False, ): """Default validations & defaults for Update diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/PartnerExtensionModel.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/PartnerExtensionModel.py index d4f1eeba6c3..c83429e2cfb 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/PartnerExtensionModel.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/PartnerExtensionModel.py @@ -43,6 +43,7 @@ def Update( version: str, configuration_settings: dict, configuration_protected_settings: dict, + original_extension: Extension, yes: bool, ) -> PatchExtension: pass From 05655de37dfca11a3f47f1f234e4240325df115d Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Mon, 9 May 2022 11:37:01 +0800 Subject: [PATCH 02/18] disable service bus by default, disable relay if target cluster is managed clusters. --- .../partner_extensions/AzureMLKubernetes.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index ed541ded8d0..eb45aa94470 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -57,6 +57,7 @@ def __init__(self): self.AZURE_LOG_ANALYTICS_CONNECTION_STRING = 'azure_log_analytics.connection_string' self.JOB_SCHEDULER_LOCATION_KEY = 'jobSchedulerLocation' self.CLUSTER_NAME_FRIENDLY_KEY = 'cluster_name_friendly' + self.NGINX_INGRESS_ENABLED_KEY = 'nginxIngress.enabled' # component flag self.ENABLE_TRAINING = 'enableTraining' @@ -134,12 +135,16 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t if cluster_type.lower() == 'connectedclusters': if resource.properties['totalNodeCount'] < 3: configuration_settings['clusterPurpose'] = 'DevTest' + configuration_settings[self.NGINX_INGRESS_ENABLED_KEY] = configuration_settings.get( + self.NGINX_INGRESS_ENABLED_KEY, 'false') if cluster_type.lower() == 'managedclusters': nodeCount = 0 for agent in resource.properties['agentPoolProfiles']: nodeCount += agent['count'] if nodeCount < 3: configuration_settings['clusterPurpose'] = 'DevTest' + if resource.properties.get('distribution', '').lower() == self.OPEN_SHIFT: + configuration_settings[self.OPEN_SHIFT] = 'true' except: pass except CloudError as ex: From ada52e50adce6bd296b12ff0528940cb61b4e0f2 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Mon, 9 May 2022 11:45:59 +0800 Subject: [PATCH 03/18] move nginx ingress logic --- .../partner_extensions/AzureMLKubernetes.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index eb45aa94470..7abbd7dd9ef 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -135,8 +135,6 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t if cluster_type.lower() == 'connectedclusters': if resource.properties['totalNodeCount'] < 3: configuration_settings['clusterPurpose'] = 'DevTest' - configuration_settings[self.NGINX_INGRESS_ENABLED_KEY] = configuration_settings.get( - self.NGINX_INGRESS_ENABLED_KEY, 'false') if cluster_type.lower() == 'managedclusters': nodeCount = 0 for agent in resource.properties['agentPoolProfiles']: @@ -162,13 +160,17 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t # do not enable service bus by default configuration_settings[self.SERVICE_BUS_ENABLED] = configuration_settings.get(self.SERVICE_BUS_ENABLED, 'false') - # do not enable relay for managed cluster(AKS) by default + # do not enable relay for managed cluster(AKS) by default, do not enable nginx for ARC by default if cluster_type == "managedClusters": configuration_settings[self.RELAY_SERVER_ENABLED] = configuration_settings.get(self.RELAY_SERVER_ENABLED, 'false') + configuration_settings[self.NGINX_INGRESS_ENABLED_KEY] = configuration_settings.get( + self.NGINX_INGRESS_ENABLED_KEY, 'true') else: configuration_settings[self.RELAY_SERVER_ENABLED] = configuration_settings.get(self.RELAY_SERVER_ENABLED, 'true') + configuration_settings[self.NGINX_INGRESS_ENABLED_KEY] = configuration_settings.get( + self.NGINX_INGRESS_ENABLED_KEY, 'false') # create Azure resources need by the extension based on the config. self.__create_required_resource( From dd9966b672f9cd36710fd2e1df8fb18a75a27d29 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Mon, 9 May 2022 12:48:55 +0800 Subject: [PATCH 04/18] remove service bus in tests --- testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 | 3 --- 1 file changed, 3 deletions(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index 1b3b6bb662d..0914b1d8f60 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -58,11 +58,8 @@ Describe 'AzureML Kubernetes Testing' { It "Deletes the extension from the cluster with inference enabled" { # cleanup the relay and servicebus $relayResourceID = Get-ExtensionConfigurationSettings $extensionName $relayResourceIDKey - $serviceBusResourceID = Get-ExtensionConfigurationSettings $extensionName $serviceBusResourceIDKey $relayNamespaceName = $relayResourceID.split("/")[8] - $serviceBusNamespaceName = $serviceBusResourceID.split("/")[8] az relay namespace delete --resource-group $ENVCONFIG.resourceGroup --name $relayNamespaceName - az servicebus namespace delete --resource-group $ENVCONFIG.resourceGroup --name $serviceBusNamespaceName $output = az $Env:K8sExtensionName delete -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --force $? | Should -BeTrue From 8dde9d8c9c491d8bda39284094ec32f47eb4f8dd Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 10:23:47 +0800 Subject: [PATCH 05/18] update HISTORY --- src/k8s-extension/HISTORY.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index f95d84e5971..ba672c87422 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -6,7 +6,7 @@ Release History 1.2.1 ++++++++++++++++++ * Provide no default values for Patch of Extension -* microsoft.azureml.kubernetes: clusterip +* microsoft.azureml.kubernetes: clusterip, disable service bus by default, do not create relay for managed clusters. 1.2.0 ++++++++++++++++++ @@ -18,7 +18,7 @@ Release History 1.1.0 ++++++++++++++++++ * Migrate Extensions api-version to 2022-03-01 -* microsoft.azureml.kubernetes: Remove inference private review warning message, disable service bus by default +* microsoft.azureml.kubernetes: Remove inference private review warning message * microsoft.openservicemesh: Enable System-assigned identity 1.0.4 From 4df242b7349b75a812c2878c056e934a988a6404 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 11:12:42 +0800 Subject: [PATCH 06/18] If no default values provided for update, assign an empty dict. --- .../partner_extensions/AzureMLKubernetes.py | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index 7abbd7dd9ef..76f458b12e6 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -208,6 +208,13 @@ def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_t def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_version, release_train, version, configuration_settings, configuration_protected_settings, original_extension, yes=False): + input_configuration_settings = copy.deepcopy(configuration_settings) + input_configuration_protected_settings = copy.deepcopy(configuration_protected_settings) + # configuration_settings and configuration_protected_settings can be none, so need to set them to empty dict + if configuration_settings is None: + configuration_settings = {} + if configuration_protected_settings is None: + configuration_protected_settings = {} self.__normalize_config(configuration_settings, configuration_protected_settings) # Prompt message to ask customer to confirm again @@ -337,6 +344,13 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers if fe_ssl_cert_file and fe_ssl_key_file: self.__set_inference_ssl_from_file(configuration_protected_settings, fe_ssl_cert_file, fe_ssl_key_file) + # if no entries are existed in configuration_protected_settings, configuration_settings, return whatever passed + # in the Update function(empty dict or None). + if len(configuration_settings) == 0: + configuration_settings = input_configuration_settings + if len(configuration_protected_settings) == 0: + configuration_protected_settings = input_configuration_protected_settings + return PatchExtension(auto_upgrade_minor_version=auto_upgrade_minor_version, release_train=release_train, version=version, @@ -701,9 +715,11 @@ def _dereference(ref_mapping_dict: Dict[str, List], output_dict: Dict[str, Any]) def _get_value_from_config_protected_config(key, config, protected_config): - if key in config: + if config is not None and key in config: return config[key] - return protected_config.get(key) + if protected_config is not None: + return protected_config.get(key) + return None def _check_nodeselector_existed(configuration_settings, configuration_protected_settings): From bb718f760d805db7a2ccd0da48bff15e651a571f Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 13:02:30 +0800 Subject: [PATCH 07/18] set original_extension_config_settings to empty dict if it is None. --- .../partner_extensions/AzureMLKubernetes.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index 76f458b12e6..2385bef1ea8 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -309,7 +309,10 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers except azure.core.exceptions.HttpResponseError: logger.info("Failed to get log analytics connection string.") - if original_extension.configuration_settings.get(self.RELAY_SERVER_ENABLED).lower() != 'false' \ + original_extension_config_settings = original_extension.configuration_settings + if original_extension_config_settings is None: + original_extension_config_settings = {} + if original_extension_config_settings.get(self.RELAY_SERVER_ENABLED).lower() != 'false' \ and self.RELAY_SERVER_CONNECTION_STRING not in configuration_protected_settings: try: relay_connection_string, _, _ = _get_relay_connection_str( @@ -321,7 +324,7 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers raise ResourceNotFoundError("Relay server not found.") from ex raise AzureResponseError("Failed to get relay connection string.") from ex - if original_extension.configuration_settings.get(self.SERVICE_BUS_ENABLED).lower() != 'false' \ + if original_extension_config_settings.get(self.SERVICE_BUS_ENABLED).lower() != 'false' \ and self.SERVICE_BUS_CONNECTION_STRING not in configuration_protected_settings: try: service_bus_connection_string, _ = _get_service_bus_connection_string( From cd06fe7c5cff0fc3b05eb8e7938cc78160364c92 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 14:36:33 +0800 Subject: [PATCH 08/18] bump version to 1.2.2 --- src/k8s-extension/HISTORY.rst | 4 ++++ src/k8s-extension/setup.py | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index ba672c87422..f7f9426206f 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -3,6 +3,10 @@ Release History =============== +1.2.2 +++++++++++++++++++ +* microsoft.azureml.kubernetes: disable service bus by default, do not create relay for managed clusters. + 1.2.1 ++++++++++++++++++ * Provide no default values for Patch of Extension diff --git a/src/k8s-extension/setup.py b/src/k8s-extension/setup.py index d9814e8c781..b0c0c1f34d7 100644 --- a/src/k8s-extension/setup.py +++ b/src/k8s-extension/setup.py @@ -33,7 +33,7 @@ # TODO: Add any additional SDK dependencies here DEPENDENCIES = [] -VERSION = "1.2.1" +VERSION = "1.2.2" with open("README.rst", "r", encoding="utf-8") as f: README = f.read() From 6e7a5715a6ba0902b62fccd05969f48532ec4f8c Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 14:48:54 +0800 Subject: [PATCH 09/18] update HISTORY --- src/k8s-extension/HISTORY.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/k8s-extension/HISTORY.rst b/src/k8s-extension/HISTORY.rst index f7f9426206f..d738bdcf21c 100644 --- a/src/k8s-extension/HISTORY.rst +++ b/src/k8s-extension/HISTORY.rst @@ -10,7 +10,7 @@ Release History 1.2.1 ++++++++++++++++++ * Provide no default values for Patch of Extension -* microsoft.azureml.kubernetes: clusterip, disable service bus by default, do not create relay for managed clusters. +* microsoft.azureml.kubernetes: clusterip 1.2.0 ++++++++++++++++++ From 721adf1871ad6ce1230d98f245b6967e2397727e Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Wed, 11 May 2022 16:28:25 +0800 Subject: [PATCH 10/18] add tsg. --- .../partner_extensions/AzureMLKubernetes.py | 21 ++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index 2385bef1ea8..56f0248b818 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -111,7 +111,8 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t configuration_settings_file, configuration_protected_settings_file): if scope == 'namespace': raise InvalidArgumentValueError("Invalid scope '{}'. This extension can't be installed " - "only at 'cluster' scope.".format(scope)) + "only at 'cluster' scope. " + "Check https://aka.ms/arcmltsg for more information.".format(scope)) # set release name explicitly to azureml release_namespace = self.DEFAULT_RELEASE_NAMESPACE scope_cluster = ScopeCluster(release_namespace=release_namespace) @@ -321,8 +322,10 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers logger.info("Get relay connection string succeeded.") except azure.mgmt.relay.models.ErrorResponseException as ex: if ex.response.status_code == 404: - raise ResourceNotFoundError("Relay server not found.") from ex - raise AzureResponseError("Failed to get relay connection string.") from ex + raise ResourceNotFoundError("Relay server not found. " + "Check https://aka.ms/arcmltsg for more information.") from ex + raise AzureResponseError("Failed to get relay connection string." + "Check https://aka.ms/arcmltsg for more information.") from ex if original_extension_config_settings.get(self.SERVICE_BUS_ENABLED).lower() != 'false' \ and self.SERVICE_BUS_CONNECTION_STRING not in configuration_protected_settings: @@ -333,8 +336,10 @@ def Update(self, cmd, resource_group_name, cluster_name, auto_upgrade_minor_vers logger.info("Get service bus connection string succeeded.") except azure.core.exceptions.HttpResponseError as ex: if ex.response.status_code == 404: - raise ResourceNotFoundError("Service bus not found.") from ex - raise AzureResponseError("Failed to get service bus connection string.") from ex + raise ResourceNotFoundError("Service bus not found." + "Check https://aka.ms/arcmltsg for more information.") from ex + raise AzureResponseError("Failed to get service bus connection string." + "Check https://aka.ms/arcmltsg for more information.") from ex configuration_protected_settings = _dereference(self.reference_mapping, configuration_protected_settings) @@ -399,7 +404,8 @@ def __validate_config(self, configuration_settings, configuration_protected_sett for key in dup_keys: logger.warning( 'Duplicate keys found in both configuration settings and configuration protected setttings: %s', key) - raise InvalidArgumentValueError("Duplicate keys found.") + raise InvalidArgumentValueError("Duplicate keys found." + "Check https://aka.ms/arcmltsg for more information.") enable_training = _get_value_from_config_protected_config( self.ENABLE_TRAINING, configuration_settings, configuration_protected_settings) @@ -477,7 +483,8 @@ def __validate_scoring_fe_settings(self, configuration_settings, configuration_p if feIsNodePort and feIsInternalLoadBalancer: raise MutuallyExclusiveArgumentError( - "When using nodePort as inferenceRouterServiceType, no need to specify internalLoadBalancerProvider.") + "When using nodePort as inferenceRouterServiceType, no need to specify internalLoadBalancerProvider." + "Check https://aka.ms/arcmltsg for more information.") if feIsNodePort: configuration_settings['scoringFe.serviceType.nodePort'] = feIsNodePort elif feIsInternalLoadBalancer: From e9e6081f4202be72aa2bde111ed0c4b713f008f1 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 12 May 2022 11:01:18 +0800 Subject: [PATCH 11/18] change version back; add tests for update scenario --- src/k8s-extension/setup.py | 2 +- .../public/AzureMLKubernetes.Tests.ps1 | 22 +++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/src/k8s-extension/setup.py b/src/k8s-extension/setup.py index b0c0c1f34d7..d9814e8c781 100644 --- a/src/k8s-extension/setup.py +++ b/src/k8s-extension/setup.py @@ -33,7 +33,7 @@ # TODO: Add any additional SDK dependencies here DEPENDENCIES = [] -VERSION = "1.2.2" +VERSION = "1.2.1" with open("README.rst", "r", encoding="utf-8") as f: README = f.read() diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index 0914b1d8f60..4aa764b3460 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -5,6 +5,8 @@ Describe 'AzureML Kubernetes Testing' { $extensionAgentNamespace = "azureml" $relayResourceIDKey = "relayserver.hybridConnectionResourceID" $serviceBusResourceIDKey = "servicebus.resourceID" + $mockUpdateKey = "mockTest" + $mockProtectedUpdateKey = "mockProtectedTest" . $PSScriptRoot/../../helper/Constants.ps1 . $PSScriptRoot/../../helper/Helper.ps1 @@ -55,6 +57,26 @@ Describe 'AzureML Kubernetes Testing' { $extensionExists | Should -Not -BeNullOrEmpty } + It "Perform Update extension" { + az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true $($mockUpdateKey)=true --config-protected $($mockProtectedUpdateKey)=true --no-wait + $? | Should -BeTrue + + # Loop and retry until the extension updated + $n = 0 + do + { + if (Get-ExtensionStatus $extensionName -eq $SUCCESS_MESSAGE) { + break + } + Start-Sleep -Seconds 10 + $n += 1 + } while ($n -le $MAX_RETRY_ATTEMPTS) + $n | Should -BeLessOrEqual $MAX_RETRY_ATTEMPTS + + $mockedUpdateData = Get-ExtensionConfigurationSettings $extensionName $mockUpdateKey + $mockedUpdateData | Should -Not -BeNullOrEmpty + } + It "Deletes the extension from the cluster with inference enabled" { # cleanup the relay and servicebus $relayResourceID = Get-ExtensionConfigurationSettings $extensionName $relayResourceIDKey From c83ad7d470f43f66b6ae42739c58bce586561ebc Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 12 May 2022 11:28:04 +0800 Subject: [PATCH 12/18] update tests --- testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index 4aa764b3460..df661c62bab 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -58,7 +58,7 @@ Describe 'AzureML Kubernetes Testing' { } It "Perform Update extension" { - az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true $($mockUpdateKey)=true --config-protected $($mockProtectedUpdateKey)=true --no-wait + az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true $mockUpdateKey=true --config-protected $mockProtectedUpdateKey=true --no-wait $? | Should -BeTrue # Loop and retry until the extension updated From f8536f4eceaa4d58536ed570ab21ce16b6be3a90 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 12 May 2022 13:17:14 +0800 Subject: [PATCH 13/18] fix update --- testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index df661c62bab..f44cc9037d2 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -58,7 +58,7 @@ Describe 'AzureML Kubernetes Testing' { } It "Perform Update extension" { - az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true $mockUpdateKey=true --config-protected $mockProtectedUpdateKey=true --no-wait + az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true "$($mockUpdateKey)=true" --config-protected "$($mockProtectedUpdateKey)=true" --no-wait $? | Should -BeTrue # Loop and retry until the extension updated From fc041a07726efba16ba5d1c585c0f8ff4a9c811b Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 12 May 2022 13:36:06 +0800 Subject: [PATCH 14/18] wait before update --- .../extensions/public/AzureMLKubernetes.Tests.ps1 | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index f44cc9037d2..39e1cb46435 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -57,6 +57,20 @@ Describe 'AzureML Kubernetes Testing' { $extensionExists | Should -Not -BeNullOrEmpty } + It "Wait for the extension to be ready" { + # Loop and retry until the extension installed + $n = 0 + do + { + if (Get-ExtensionStatus $extensionName -eq $SUCCESS_MESSAGE) { + break + } + Start-Sleep -Seconds 10 + $n += 1 + } while ($n -le $MAX_RETRY_ATTEMPTS) + $n | Should -BeLessOrEqual $MAX_RETRY_ATTEMPTS + } + It "Perform Update extension" { az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true "$($mockUpdateKey)=true" --config-protected "$($mockProtectedUpdateKey)=true" --no-wait $? | Should -BeTrue From b8f6b3fb1489f25807988122ddc5e82bccd748f2 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Thu, 12 May 2022 14:13:58 +0800 Subject: [PATCH 15/18] fix test cases --- .../extensions/public/AzureMLKubernetes.Tests.ps1 | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index 39e1cb46435..b288fbfefb4 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -62,7 +62,12 @@ Describe 'AzureML Kubernetes Testing' { $n = 0 do { - if (Get-ExtensionStatus $extensionName -eq $SUCCESS_MESSAGE) { + + $output = az $Env:K8sExtensionName show -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName + $? | Should -BeTrue + + $provisioningState = ($output | ConvertFrom-Json).provisioningState + if ($provisioningState -eq "Succeeded") { break } Start-Sleep -Seconds 10 @@ -79,7 +84,12 @@ Describe 'AzureML Kubernetes Testing' { $n = 0 do { - if (Get-ExtensionStatus $extensionName -eq $SUCCESS_MESSAGE) { + + $output = az $Env:K8sExtensionName show -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName + $? | Should -BeTrue + + $provisioningState = ($output | ConvertFrom-Json).provisioningState + if ($provisioningState -eq "Succeeded") { break } Start-Sleep -Seconds 10 From 78ed7af890773e681b0f28b2ba445decacd8759a Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Fri, 13 May 2022 08:35:01 +0800 Subject: [PATCH 16/18] increase check interval, change release train to stable --- .../test/extensions/public/AzureMLKubernetes.Tests.ps1 | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index b288fbfefb4..70d5f5432dd 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -15,7 +15,7 @@ Describe 'AzureML Kubernetes Testing' { It 'Creates the extension and checks that it onboards correctly with inference and SSL enabled' { $sslKeyPemFile = Join-Path (Join-Path (Join-Path (Split-Path $PSScriptRoot -Parent) "data") "azure_ml") "test_key.pem" $sslCertPemFile = Join-Path (Join-Path (Join-Path (Split-Path $PSScriptRoot -Parent) "data") "azure_ml") "test_cert.pem" - az $Env:K8sExtensionName create -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters --extension-type $extensionType -n $extensionName --release-train staging --config enableInference=true identity.proxy.remoteEnabled=True identity.proxy.remoteHost=https://master.experiments.azureml-test.net inferenceRouterServiceType=nodePort sslCname=test.domain --config-protected sslKeyPemFile=$sslKeyPemFile sslCertPemFile=$sslCertPemFile --no-wait + az $Env:K8sExtensionName create -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters --extension-type $extensionType -n $extensionName --release-train stable --config enableInference=true identity.proxy.remoteEnabled=True identity.proxy.remoteHost=https://master.experiments.azureml-test.net inferenceRouterServiceType=nodePort sslCname=test.domain --config-protected sslKeyPemFile=$sslKeyPemFile sslCertPemFile=$sslCertPemFile --no-wait $? | Should -BeTrue $output = az $Env:K8sExtensionName show -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName @@ -67,10 +67,11 @@ Describe 'AzureML Kubernetes Testing' { $? | Should -BeTrue $provisioningState = ($output | ConvertFrom-Json).provisioningState + Write-Host "Provisioning state: $provisioningState" if ($provisioningState -eq "Succeeded") { break } - Start-Sleep -Seconds 10 + Start-Sleep -Seconds 20 $n += 1 } while ($n -le $MAX_RETRY_ATTEMPTS) $n | Should -BeLessOrEqual $MAX_RETRY_ATTEMPTS @@ -89,10 +90,11 @@ Describe 'AzureML Kubernetes Testing' { $? | Should -BeTrue $provisioningState = ($output | ConvertFrom-Json).provisioningState + Write-Host "Provisioning state: $provisioningState" if ($provisioningState -eq "Succeeded") { break } - Start-Sleep -Seconds 10 + Start-Sleep -Seconds 20 $n += 1 } while ($n -le $MAX_RETRY_ATTEMPTS) $n | Should -BeLessOrEqual $MAX_RETRY_ATTEMPTS From 189c15aed1bf1b4e511310b375ade2e3bc5caab5 Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Fri, 13 May 2022 09:04:05 +0800 Subject: [PATCH 17/18] udpate test, do not disable inference --- testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 index 70d5f5432dd..404e50dcd78 100644 --- a/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 +++ b/testing/test/extensions/public/AzureMLKubernetes.Tests.ps1 @@ -78,7 +78,7 @@ Describe 'AzureML Kubernetes Testing' { } It "Perform Update extension" { - az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config enableInference=false enableTraining=true "$($mockUpdateKey)=true" --config-protected "$($mockProtectedUpdateKey)=true" --no-wait + az $Env:K8sExtensionName update -c $($ENVCONFIG.arcClusterName) -g $($ENVCONFIG.resourceGroup) --cluster-type connectedClusters -n $extensionName --config "$($mockUpdateKey)=true" --config-protected "$($mockProtectedUpdateKey)=true" --no-wait $? | Should -BeTrue # Loop and retry until the extension updated From 85a15d1e88940501b0d53337855b22d2a63a2dbe Mon Sep 17 00:00:00 2001 From: Yue Yu Date: Fri, 13 May 2022 14:52:11 +0800 Subject: [PATCH 18/18] do not populate clusterPurpose if infrenceRouterHA is not set. --- .../partner_extensions/AzureMLKubernetes.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py index 34eb7d5a893..97fab3fd8aa 100644 --- a/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py +++ b/src/k8s-extension/azext_k8s_extension/partner_extensions/AzureMLKubernetes.py @@ -369,11 +369,12 @@ def __normalize_config(self, configuration_settings, configuration_protected_set # inference inferenceRouterHA = _get_value_from_config_protected_config( self.inferenceRouterHA, configuration_settings, configuration_protected_settings) - isTestCluster = True if inferenceRouterHA is not None and str(inferenceRouterHA).lower() == 'false' else False - if isTestCluster: - configuration_settings['clusterPurpose'] = 'DevTest' - else: - configuration_settings['clusterPurpose'] = 'FastProd' + if inferenceRouterHA is not None: + isTestCluster = str(inferenceRouterHA).lower() == 'false' + if isTestCluster: + configuration_settings['clusterPurpose'] = 'DevTest' + else: + configuration_settings['clusterPurpose'] = 'FastProd' inferenceRouterServiceType = _get_value_from_config_protected_config( self.inferenceRouterServiceType, configuration_settings, configuration_protected_settings)