Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[k8s-extension] Add Managed Identity Auth support for ContainerInsights Extension #118

Merged
merged 13 commits into from
Apr 14, 2022
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,16 @@
import datetime
import json

from ..utils import get_cluster_rp_api_version

from knack.log import get_logger

from azure.cli.core.azclierror import InvalidArgumentValueError
from azure.cli.core.azclierror import AzCLIError, CLIError, InvalidArgumentValueError, ClientRequestError
from azure.cli.core.commands import LongRunningOperation
from azure.cli.core.commands.client_factory import get_mgmt_service_client, get_subscription_id
from azure.cli.core.util import sdk_no_wait
from azure.cli.core.util import sdk_no_wait, send_raw_request
from msrestazure.tools import parse_resource_id, is_valid_resource_id
from azure.core.exceptions import HttpResponseError

from ..vendored_sdks.models import Extension
from ..vendored_sdks.models import ScopeCluster
Expand All @@ -33,7 +36,6 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
scope, auto_upgrade_minor_version, release_train, version, target_namespace,
release_namespace, configuration_settings, configuration_protected_settings,
configuration_settings_file, configuration_protected_settings_file):

"""ExtensionType 'microsoft.azuremonitor.containers' specific validations & defaults for Create
Must create and return a valid 'Extension' object.

Expand Down Expand Up @@ -70,6 +72,48 @@ def Create(self, cmd, client, resource_group_name, cluster_name, name, cluster_t
)
return extension, name, create_identity

def Delete(self, cmd, client, resource_group_name, cluster_name, name, cluster_type, yes):
# Delete DCR-A if it exists incase of MSI Auth
useAADAuth = False
isDCRAExists = False
cluster_rp, _ = get_cluster_rp_api_version(cluster_type)
try:
extension = client.get(resource_group_name, cluster_rp, cluster_type, cluster_name, name)
except Exception:
pass # its OK to ignore the exception since MSI auth in preview

subscription_id = get_subscription_id(cmd.cli_ctx)
# handle cluster type here
cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/{2}/{3}/{4}'.format(subscription_id, resource_group_name, cluster_rp, cluster_type, cluster_name)
if (extension is not None) and (extension.configuration_settings is not None):
configSettings = extension.configuration_settings
if 'omsagent.useAADAuth' in configSettings:
useAADAuthSetting = configSettings['omsagent.useAADAuth']
if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting):
useAADAuth = True
if useAADAuth:
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version=2019-11-01-preview"
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "GET", association_url,)
isDCRAExists = True
break
except HttpResponseError as ex:
# Customize the error message for resources not found
if ex.response.status_code == 404:
isDCRAExists = False
except Exception:
pass # its OK to ignore the exception since MSI auth in preview

if isDCRAExists:
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version=2019-11-01-preview"
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "DELETE", association_url,)
break
except Exception:
pass # its OK to ignore the exception since MSI auth in preview


# Custom Validation Logic for Container Insights

Expand Down Expand Up @@ -159,7 +203,8 @@ def _ensure_default_log_analytics_workspace_for_monitoring(cmd, subscription_id,
"westeurope": "westeurope",
"westindia": "centralindia",
"westus": "westus",
"westus2": "westus2"
"westus2": "westus2",
"eastus2euap": "eastus2euap"
}

# mapping for azure china cloud
Expand Down Expand Up @@ -376,6 +421,7 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_n

subscription_id = get_subscription_id(cmd.cli_ctx)
workspace_resource_id = ''
useAADAuth = False

if configuration_settings is not None:
if 'loganalyticsworkspaceresourceid' in configuration_settings:
Expand All @@ -385,6 +431,12 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_n
if 'logAnalyticsWorkspaceResourceID' in configuration_settings:
workspace_resource_id = configuration_settings['logAnalyticsWorkspaceResourceID']

if 'omsagent.useAADAuth' in configuration_settings:
useAADAuthSetting = configuration_settings['omsagent.useAADAuth']
logger.info("provided useAADAuth flag is : %s", useAADAuthSetting)
if (isinstance(useAADAuthSetting, str) and str(useAADAuthSetting).lower() == "true") or (isinstance(useAADAuthSetting, bool) and useAADAuthSetting):
useAADAuth = True

workspace_resource_id = workspace_resource_id.strip()

if configuration_protected_settings is not None:
Expand All @@ -409,7 +461,11 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_n
raise InvalidArgumentValueError('{} is not a valid Azure resource ID.'.format(workspace_resource_id))

if is_ci_extension_type:
_ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result()
if useAADAuth:
logger.info("MSI onboarding since omsagent.useAADAuth set to true")
_ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_name, workspace_resource_id)
else:
_ensure_container_insights_for_monitoring(cmd, workspace_resource_id).result()

# extract subscription ID and resource group from workspace_resource_id URL
parsed = parse_resource_id(workspace_resource_id)
Expand Down Expand Up @@ -440,3 +496,188 @@ def _get_container_insights_settings(cmd, cluster_resource_group_name, cluster_n
configuration_settings['omsagent.domain'] = 'opinsights.azure.eaglex.ic.gov'
elif cloud_name.lower() == 'ussec':
configuration_settings['omsagent.domain'] = 'opinsights.azure.microsoft.scloud'


def get_existing_container_insights_extension_dcr_tags(cmd, dcr_url):
tags = {}
_MAX_RETRY_TIMES = 3
for retry_count in range(0, _MAX_RETRY_TIMES):
try:
resp = send_raw_request(
cmd.cli_ctx, "GET", dcr_url
)
json_response = json.loads(resp.text)
if json_response["tags"] is not None:
tags = json_response["tags"]
break
except CLIError as e:
if "ResourceNotFound" in str(e):
break
if retry_count >= (_MAX_RETRY_TIMES - 1):
raise e
return tags


def _ensure_container_insights_dcr_for_monitoring(cmd, subscription_id, cluster_resource_group_name, cluster_name, workspace_resource_id):
from azure.core.exceptions import HttpResponseError

cluster_region = ''
resources = cf_resources(cmd.cli_ctx, subscription_id)
cluster_resource_id = '/subscriptions/{0}/resourceGroups/{1}/providers/Microsoft.Kubernetes' \
'/connectedClusters/{2}'.format(subscription_id, cluster_resource_group_name, cluster_name)
try:
resource = resources.get_by_id(cluster_resource_id, '2020-01-01-preview')
cluster_region = resource.location.lower()
except HttpResponseError as ex:
raise ex

# extract subscription ID and resource group from workspace_resource_id URL
parsed = parse_resource_id(workspace_resource_id)
workspace_subscription_id, workspace_resource_group = parsed["subscription"], parsed["resource_group"]
workspace_region = ''
resources = cf_resources(cmd.cli_ctx, workspace_subscription_id)
try:
resource = resources.get_by_id(workspace_resource_id, '2015-11-01-preview')
workspace_region = resource.location
except HttpResponseError as ex:
raise ex

dataCollectionRuleName = f"MSCI-{cluster_name}-{cluster_region}"
dcr_resource_id = f"/subscriptions/{workspace_subscription_id}/resourceGroups/{workspace_resource_group}/providers/Microsoft.Insights/dataCollectionRules/{dataCollectionRuleName}"

# first get the association between region display names and region IDs (because for some reason
# the "which RPs are available in which regions" check returns region display names)
region_names_to_id = {}
# retry the request up to two times
for _ in range(3):
try:
location_list_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"/subscriptions/{subscription_id}/locations?api-version=2019-11-01"
r = send_raw_request(cmd.cli_ctx, "GET", location_list_url)
# this is required to fool the static analyzer. The else statement will only run if an exception
# is thrown, but flake8 will complain that e is undefined if we don't also define it here.
error = None
break
except AzCLIError as e:
error = e
else:
# This will run if the above for loop was not broken out of. This means all three requests failed
raise error
json_response = json.loads(r.text)
for region_data in json_response["value"]:
region_names_to_id[region_data["displayName"]] = region_data["name"]

# check if region supports DCR and DCR-A
for _ in range(3):
try:
feature_check_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"/subscriptions/{subscription_id}/providers/Microsoft.Insights?api-version=2020-10-01"
r = send_raw_request(cmd.cli_ctx, "GET", feature_check_url)
error = None
break
except AzCLIError as e:
error = e
else:
raise error

json_response = json.loads(r.text)
for resource in json_response["resourceTypes"]:
if (resource["resourceType"].lower() == "datacollectionrules"):
region_ids = map(lambda x: region_names_to_id[x], resource["locations"]) # dcr supported regions
if (workspace_region not in region_ids):
raise ClientRequestError(f"Data Collection Rules are not supported for LA workspace region {workspace_region}")
if (resource["resourceType"].lower() == "datacollectionruleassociations"):
region_ids = map(lambda x: region_names_to_id[x], resource["locations"]) # dcr-a supported regions
if (cluster_region not in region_ids):
raise ClientRequestError(f"Data Collection Rule Associations are not supported for cluster region {cluster_region}")

dcr_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{dcr_resource_id}?api-version=2019-11-01-preview"
# get existing tags on the container insights extension DCR if the customer added any
existing_tags = get_existing_container_insights_extension_dcr_tags(cmd, dcr_url)

# create the DCR
dcr_creation_body = json.dumps(
{
"location": workspace_region,
"tags": existing_tags,
"properties": {
"dataSources": {
"extensions": [
{
"name": "ContainerInsightsExtension",
"streams": [
"Microsoft-Perf",
"Microsoft-ContainerInventory",
"Microsoft-ContainerLog",
"Microsoft-ContainerLogV2",
"Microsoft-ContainerNodeInventory",
"Microsoft-KubeEvents",
"Microsoft-KubeMonAgentEvents",
"Microsoft-KubeNodeInventory",
"Microsoft-KubePodInventory",
"Microsoft-KubePVInventory",
"Microsoft-KubeServices",
"Microsoft-InsightsMetrics",
],
"extensionName": "ContainerInsights",
}
]
},
"dataFlows": [
{
"streams": [
"Microsoft-Perf",
"Microsoft-ContainerInventory",
"Microsoft-ContainerLog",
"Microsoft-ContainerLogV2",
"Microsoft-ContainerNodeInventory",
"Microsoft-KubeEvents",
"Microsoft-KubeMonAgentEvents",
"Microsoft-KubeNodeInventory",
"Microsoft-KubePodInventory",
"Microsoft-KubePVInventory",
"Microsoft-KubeServices",
"Microsoft-InsightsMetrics",
],
"destinations": ["la-workspace"],
}
],
"destinations": {
"logAnalytics": [
{
"workspaceResourceId": workspace_resource_id,
"name": "la-workspace",
}
]
},
},
}
)

for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "PUT", dcr_url, body=dcr_creation_body)
error = None
break
except AzCLIError as e:
error = e
else:
raise error

association_body = json.dumps(
{
"location": cluster_region,
"properties": {
"dataCollectionRuleId": dcr_resource_id,
"description": "routes monitoring data to a Log Analytics workspace",
},
}
)
association_url = cmd.cli_ctx.cloud.endpoints.resource_manager + f"{cluster_resource_id}/providers/Microsoft.Insights/dataCollectionRuleAssociations/ContainerInsightsExtension?api-version=2019-11-01-preview"
for _ in range(3):
try:
send_raw_request(cmd.cli_ctx, "PUT", association_url, body=association_body,)
error = None
break
except AzCLIError as e:
error = e
else:
raise error