From 5c09a24ba61d4dc8175b33ad77ccf250f8d2d564 Mon Sep 17 00:00:00 2001 From: Robert Choi Date: Wed, 6 Apr 2022 16:26:44 +0900 Subject: [PATCH] handle all corner cases when updating thanos-sc endpoints --- .../update-decapod-manifest-wftpl.yaml | 19 ++++--- deploy_apps/tks-lma-federation-wftpl.yaml | 49 ++++++++++++++----- .../tks-remove-lma-federation-wftpl.yaml | 41 ++++++++++++++-- 3 files changed, 85 insertions(+), 24 deletions(-) diff --git a/decapod-manifests/update-decapod-manifest-wftpl.yaml b/decapod-manifests/update-decapod-manifest-wftpl.yaml index c4834ff2..ce123f45 100644 --- a/decapod-manifests/update-decapod-manifest-wftpl.yaml +++ b/decapod-manifests/update-decapod-manifest-wftpl.yaml @@ -63,13 +63,18 @@ spec: print("Cloning repository...") repoUrl = "https://" + os.environ['TOKEN'] + "@{{inputs.parameters.site_repo_url}}" - repo = git.Repo.clone_from(repoUrl, siteDir) - with repo.config_writer() as git_config: - git_config.set_value('user', 'email', 'tks-argo@tks.com') - git_config.set_value('user', 'name', 'TKS Argo') - print("Cloned repo contents:") - os.system("ls -al {}".format(sitePath)) - os.system("ls -al {}/{}".format(sitePath, clusterName)) + try: + repo = git.Repo.clone_from(repoUrl, siteDir) + with repo.config_writer() as git_config: + git_config.set_value('user', 'email', 'tks-argo@tks.com') + git_config.set_value('user', 'name', 'TKS Argo') + print("Cloned repo contents:") + os.system("ls -al {}".format(sitePath)) + os.system("ls -al {}/{}".format(sitePath, clusterName)) + except git.exc.GitCommandError as e: + print("Repository {} doesn't exist. Skipping..".format(repoUrl)) + print(e) + sys.exit(0) else: print("The repo already exists. Pulling latest updates..") diff --git a/deploy_apps/tks-lma-federation-wftpl.yaml b/deploy_apps/tks-lma-federation-wftpl.yaml index 7aa789e6..fb3b36db 100644 --- a/deploy_apps/tks-lma-federation-wftpl.yaml +++ b/deploy_apps/tks-lma-federation-wftpl.yaml @@ -47,7 +47,7 @@ spec: name: lma-federation template: deploy - - - name: updateTksInfo + - - name: updateEndpointToTksInfo templateRef: name: update-tks-app-group-info template: updateTksAppGroup @@ -55,7 +55,7 @@ spec: parameters: # TODO: Can this be pre-determined? Or composed dynamically on deployment? - name: endpoints - value: "{'PROMETHEUS': 'thanos-sidecar.cluster_xy'}" + value: "{'PROMETHEUS': '{{workflow.parameters.cluster_id}}-thanos-ep'}" # LMA appGroup specific task # - - name: collectThanosScEndpoints @@ -71,7 +71,7 @@ spec: value: "{{ workflow.parameters.app_group_id }}" # Again, how can this be determined? - name: cur_endpoint - value: "thanos-sidecar.cluster_xy" + value: "{{workflow.parameters.cluster_id}}-thanos-ep" - - name: updateDecapodManifestOutwards templateRef: @@ -184,6 +184,9 @@ spec: image: sktdev/python-centos-wf-worker:v1.0 command: - python + envFrom: + - secretRef: + name: "github-tks-mgmt-token" env: - name: PYTHONPATH value: "/opt/protobuf/:/opt/rh/rh-python38/root/lib/python3.8/site-packages/:/opt/app-root/lib/python3.8/site-packages/" @@ -195,6 +198,7 @@ spec: mountPath: /mnt/out source: | import sys + import os import google.protobuf import grpc import info_pb2 @@ -202,6 +206,7 @@ spec: import common_pb2 import common_pb2_grpc import json + import git output_cluster_list = [] temp_map = {} @@ -238,24 +243,44 @@ spec: continue if cluster.id != "{{inputs.parameters.cluster_id}}": - print("Found cluster: {}".format(cluster.id)) - temp_map["name"] = cluster.id - str_json = json.dumps(temp_map) - output_cluster_list.append(str_json) + print("*******************************************") + print("Checking cluster: {}".format(cluster.id)) + + print("Checking if corresponding cluster repo exists..") + url = "@github.com/{{workflow.parameters.github_account}}/{}".format(cluster.id) + repoUrl = "https://" + os.environ['TOKEN'] + url + try: + repo = git.Repo.clone_from(repoUrl, './tempcluster') + + except git.exc.GitCommandError as e: + print(e) + print("Repo {} doesn't exist. Skipping this cluster..".format(repoUrl)) + continue res = app_stub.GetAppGroupsByClusterID(common_pb2.IDRequest(id=cluster.id)) print("Response from GetAppGroupsByClusterID:") print(res.app_groups) + os.system("rm -rf ./tempcluster") + + # Check if LMA group exists. for app_group in res.app_groups: if app_group.type == common_pb2.LMA: - res = app_stub.GetApps(info_pb2.GetAppsRequest(app_group_id="{{inputs.parameters.app_group_id}}", type=common_pb2.PROMETHEUS)) + print("Found LMA app_group: {}".format(app_group.app_group_name)) + res = app_stub.GetApps(info_pb2.GetAppsRequest(app_group_id=app_group.app_group_id, type=common_pb2.PROMETHEUS)) + + if res.apps: + # This is based on the premise that there's only one prometheus per appGroup. + endpoint = res.apps[0].endpoint + print("Get Thanos-sc endpoint: {}. Appending it to inward list.".format(endpoint)) - # This is based on the premise that there's only one prometheus per appGroup. - endpoint = res.apps[0].endpoint + # Add this cluster's endpoint to endpoint map + inwards_endpoint_list.append(endpoint) - # Add this cluster's endpoint to endpoint map - inwards_endpoint_list.append(endpoint) + # Add this cluster to outward list so that current ep is updated to this cluster + temp_map["name"] = cluster.id + str_json = json.dumps(temp_map) + output_cluster_list.append(str_json) # Update current endpoint to other cluster's site-yaml # outwards_endpoint_map['querier.stores'] = "{{inputs.parameters.cur_endpoint}}" diff --git a/deploy_apps/tks-remove-lma-federation-wftpl.yaml b/deploy_apps/tks-remove-lma-federation-wftpl.yaml index 566974ff..37e228da 100644 --- a/deploy_apps/tks-remove-lma-federation-wftpl.yaml +++ b/deploy_apps/tks-remove-lma-federation-wftpl.yaml @@ -62,7 +62,7 @@ spec: value: "{{ workflow.parameters.app_group_id }}" # Again, how can this be determined? - name: cur_endpoint - value: "thanos-sidecar.cluster_xy" + value: "{{workflow.parameters.cluster_id}}-thanos-ep" # Delete endpoint from other cluster's manifests - - name: updateDecapodManifestOutwards @@ -152,6 +152,9 @@ spec: image: sktdev/python-centos-wf-worker:v1.0 command: - python + envFrom: + - secretRef: + name: "github-tks-mgmt-token" env: - name: PYTHONPATH value: "/opt/protobuf/:/opt/rh/rh-python38/root/lib/python3.8/site-packages/:/opt/app-root/lib/python3.8/site-packages/" @@ -163,6 +166,7 @@ spec: mountPath: /mnt/out source: | import sys + import os import google.protobuf import grpc import info_pb2 @@ -170,6 +174,7 @@ spec: import common_pb2 import common_pb2_grpc import json + import git output_cluster_list = [] temp_map = {} @@ -200,10 +205,36 @@ spec: # Iterate over cluster list except current cluster # for cluster in res.clusters: if cluster.id != "{{inputs.parameters.cluster_id}}": - print("Found cluster: {}".format(cluster.id)) - temp_map["name"] = cluster.id - str_json = json.dumps(temp_map) - output_cluster_list.append(str_json) + print("*******************************************") + print("Checking cluster: {}".format(cluster.id)) + + print("Checking if corresponding cluster repo exists..") + url = "@github.com/{{workflow.parameters.github_account}}/{}".format(cluster.id) + repoUrl = "https://" + os.environ['TOKEN'] + url + try: + repo = git.Repo.clone_from(repoUrl, './tempcluster') + + except git.exc.GitCommandError as e: + print(e) + print("Repo {} doesn't exist. Skipping this cluster..".format(repoUrl)) + continue + + res = app_stub.GetAppGroupsByClusterID(common_pb2.IDRequest(id=cluster.id)) + print("Response from GetAppGroupsByClusterID:") + print(res.app_groups) + + os.system("rm -rf ./tempcluster") + + # Check if LMA group exists. + for app_group in res.app_groups: + if app_group.type == common_pb2.LMA: + print("Found LMA app_group: {}".format(app_group.app_group_name)) + res = app_stub.GetApps(info_pb2.GetAppsRequest(app_group_id=app_group.app_group_id, type=common_pb2.PROMETHEUS)) + + # Add this cluster to outward list so that current ep is deleted from this cluster + temp_map["name"] = cluster.id + str_json = json.dumps(temp_map) + output_cluster_list.append(str_json) # Update current endpoint to other cluster's site-yaml # outwards_endpoint_map['querier.stores'] = "{{inputs.parameters.cur_endpoint}}"