diff --git a/deploy_apps/tks-lma-federation-wftpl.yaml b/deploy_apps/tks-lma-federation-wftpl.yaml index cd0c3094..0546f3d4 100644 --- a/deploy_apps/tks-lma-federation-wftpl.yaml +++ b/deploy_apps/tks-lma-federation-wftpl.yaml @@ -311,179 +311,6 @@ spec: echo "$thanos_sc_ep:$THANOS_SC_PORT" > /mnt/out/thanos_sc_ep.txt fi - - name: collectThanosScEndpoints - inputs: - parameters: - - name: tks_info_host - - name: app_group_id - outputs: - parameters: - - name: outwards_cluster_list - valueFrom: - path: /mnt/out/cluster_list.txt - - name: inwards_endpoint_map - valueFrom: - path: /mnt/out/inwards_endpoint.txt - - name: cur_cluster_name - valueFrom: - path: /mnt/out/cur_cluster_name.txt - volumes: - - name: out - emptyDir: {} - script: - name: 'collect' - image: harbor.taco-cat.xyz/tks/centos-tks-api:v1.0 - command: ["python"] - envFrom: - - secretRef: - name: "git-svc-token" - - secretRef: - name: "tks-api-secret" - volumeMounts: - - name: out - mountPath: /mnt/out - source: | - import sys - import os - import git - import requests - import json - - TKS_API_URL = "{{workflow.parameters.tks_info_host}}" - CLUSTER_ID = "{{workflow.parameters.cluster_id}}" - #CLUSTER_ID = "caldcde6u" - #CLUSTER_ID = "c6fk1w3dm" - - def getToken() : - data = { - 'organizationId' : os.environ['ORGANIZATION_ID'], - 'accountId': os.environ['ACCOUNT_ID'], - 'password' : os.environ['PASSWORD'] - } - - res = requests.post(TKS_API_URL+"/api/1.0/auth/login", json = data ) - if res.status_code != 200 : - return '' - resJson = res.json() - return resJson['user']['token'] - - output_cluster_list = [] - temp_map = {} - inwards_endpoint_list = [] - inwards_endpoint_map = {} - outwards_endpoint_map = {} - - TOKEN=getToken() - - res = requests.get(TKS_API_URL+"/api/1.0/clusters/" + CLUSTER_ID, - headers={"Authorization": "Bearer " + TOKEN} ) - if res.status_code != 200 : - sys.exit('Failed to get cluster') - - cluster = res.json()['cluster'] - print( cluster ) - organizationId = cluster['organizationId'] - cur_cluster_name = cluster['id'] - - res = requests.get(TKS_API_URL+"/api/1.0/clusters?organizationId=" + organizationId, - headers={"Authorization": "Bearer " + TOKEN} ) - if res.status_code != 200 : - sys.exit('Failed to get clusters') - - clusters = res.json()['clusters'] - - print("Iterating over clusters in the same contract...") - - # Iterate over cluster list except current cluster # - for cluster in clusters: - if cluster['status'] != "RUNNING": - continue - - if cluster['id'] != CLUSTER_ID : - print("*******************************************") - print("Checking cluster: {}".format(cluster['id'])) - - gitBaseUrl = os.environ['GIT_SVC_URL'].replace("http://","") - print( gitBaseUrl ) - - - print("Checking if corresponding cluster repo exists..") - #url = "@github.com/{{workflow.parameters.github_account}}/{}".format(cluster['id']) - url = "@" + gitBaseUrl + "/" + os.environ['USERNAME'] + "/" + cluster['id'] - print( url ) - - repoUrl = "http://" + os.environ['TOKEN'] + url - try: - repo = git.Repo.clone_from(repoUrl, './tempcluster') - - except git.exc.GitCommandError as e: - print(e) - print("Repo {} doesn't exist. Skipping this cluster..".format(repoUrl)) - continue - - res = requests.get(TKS_API_URL+"/api/1.0/app-groups?clusterId=" + cluster['id'], - headers={"Authorization": "Bearer " + TOKEN} ) - if res.status_code != 200 : - print( 'Failed to get appgroups for cluster ') - continue - - appGroups = res.json()['appGroups'] - print( appGroups ) - - os.system("rm -rf ./tempcluster") - - # Check if LMA group exists. - for appGroup in appGroups: - if appGroup['appGroupType'] == "LMA" : - print("Found LMA appGroup: {}".format(appGroup['name'])) - - res = requests.get(TKS_API_URL+"/api/1.0/app-groups/" + appGroup['id'] + "/applications?applicationType=PROMETHEUS", - headers={"Authorization": "Bearer " + TOKEN} ) - if res.status_code != 200 : - print( 'Failed to get applications for appgroup') - continue - - applications = res.json()['applications'] - if applications : - # This is based on the premise that there's only one prometheus per appGroup. - endpoint = applications[0]['endpoint'] - print("Get Thanos-sc endpoint: {}. Appending it to inward list.".format(endpoint)) - - # Add this cluster's endpoint to endpoint map - inwards_endpoint_list.append(endpoint) - - # Add this cluster to outward list so that current ep is updated to this cluster - temp_map["name"] = cluster['id'] - str_json = json.dumps(temp_map) - output_cluster_list.append(str_json) - - - # Compose profer format to be used as input on next step - inwards_endpoint_map['querier.stores'] = inwards_endpoint_list - - ########################### - # Construct output params # - ########################### - len_list = len(output_cluster_list) - - with open("/mnt/out/cluster_list.txt", "w") as f: - f.write('[') - - print("*** Outwards Cluster List ***") - for idx, item in enumerate(output_cluster_list, start=1): - print("item {}: {}".format(idx, item)) - f.write(item.strip("'")) - if idx < len_list: - f.write(',') - f.write(']') - - with open("/mnt/out/inwards_endpoint.txt", "w") as f: - str_inwards_endpoint = repr(inwards_endpoint_map) - f.write(str_inwards_endpoint) - - with open("/mnt/out/cur_cluster_name.txt", "w") as f: - f.write(cur_cluster_name) - - name: create-keycloak-client activeDeadlineSeconds: 600 inputs: diff --git a/deploy_apps/tks-primary-cluster.yaml b/deploy_apps/tks-primary-cluster.yaml index 9a87fab1..97d1dc0d 100644 --- a/deploy_apps/tks-primary-cluster.yaml +++ b/deploy_apps/tks-primary-cluster.yaml @@ -147,12 +147,6 @@ spec: ] when: "{{workflow.parameters.object_store}} == s3" - # TODO: 전체 완성을 위해서는 아래내역을 구현하여 동적인 bucket을 만드는 방식으로 구현해야 하지만 - # 5월 오픈전 가능한 형상을 위해 협의한 바(아래)에 따라 본부분은 기존 준비됀 것을 사용하는 것으로 구현하고 추후 수정하다. - # 1. 사용자가 생성하는 첫번째 클러스터는 primary cluster - # 2. primary cluster는 계약이 종료되기 전까지 임의 삭제불가 - # 3. 개별 클러스터에서 수행되는 모니터링은 없고 계약단위에서 수행되어야 함 - # 하지만 이부분에 datasource 바꿔주는 부분을 포함하고 있으므로 일단 한번 타야할듯... - - name: change-target template: change-logging-target arguments: @@ -216,6 +210,28 @@ spec: - name: primary_cluster - name: member_clusters steps: + + - - name: change-thanos-sidecar + template: sub-change-thanos-sidecar + arguments: + parameters: + - name: primary_cluster + value: '{{inputs.parameters.primary_cluster}}' + - name: member_clusters + value: '{{inputs.parameters.member_clusters}}' + + - - name: render-current-cluster + templateRef: + name: event-gitea-render-manifests + template: main + arguments: + parameters: + - name: decapod_site_repo + value: "{{ workflow.parameters.github_account }}/{{ workflow.parameters.cluster_id }}" + - name: base_repo_branch + value: "{{ workflow.parameters.base_repo_branch }}" + when: "{{steps.change-thanos-sidecar.outputs.parameters.changed}} != 'NO_CHANGE_HERE'" # 이미 변경내역이 반영된 (한번 수행됐던) 클러스터라면 랜더링은 필요없음 + - - name: sync-organization-changes template: sub-sync-organization-changes arguments: @@ -232,10 +248,10 @@ spec: arguments: parameters: - name: decapod_site_repo - value: "{{ workflow.parameters.github_account }}/{{steps.sync-organization-changes.outputs.parameters.primary_cluster}}" + value: "{{ workflow.parameters.github_account }}/{{steps.sync-organization-changes.outputs.parameters.changed}}" - name: base_repo_branch value: "{{ workflow.parameters.base_repo_branch }}" - when: "{{steps.sync-organization-changes.outputs.parameters.primary_cluster}} != 'NO_CHANGE_HERE'" + when: "{{steps.sync-organization-changes.outputs.parameters.changed}} != 'NO_CHANGE_HERE'" ####################### # Template Definition # @@ -258,22 +274,9 @@ spec: retryStrategy: limit: 2 - # - name: sub-prepare-bucket - # inputs: - # parameters: - # - name: primary_cluster - # container: - # name: prepare-bucket - # image: harbor.taco-cat.xyz/tks/hyperkube:v1.18.6 - # command: - # - /bin/bash - # - '-c' - # - | - # echo "prepare bucket for the '{{workflow.parameters.organization_id}}' (clusters: '{{inputs.parameters.primary_cluster}}')" - # activeDeadlineSeconds: 900 - # retryStrategy: - # limit: 2 - + # function sub-pre-change-logging-target + # 1. Change endpoint of fluentbit-output (all in org.) + # 2. Change endpoint of thanos-sidecar in prometheus-pod (all in org.) - name: sub-pre-change-logging-target inputs: parameters: @@ -546,6 +549,104 @@ spec: path: /mnt/out/modified_cluster_list.txt activeDeadlineSeconds: 900 + - name: sub-change-thanos-sidecar + inputs: + parameters: + - name: primary_cluster + - name: member_clusters + container: + name: logging-target-changer + image: harbor.taco-cat.xyz/tks/shyaml_jq_yq_kubectl_python:3.11 + command: + - /bin/bash + - '-c' + - | + #/bin/bash + + set -ex + + function log() { + level=$1 + msg=$2 + date=$(date '+%F %H:%M:%S') + echo "[$date] $level $msg" + } + + current_cluster={{workflow.parameters.cluster_id}} + primary_cluster={{inputs.parameters.primary_cluster}} + member_clusters="{{inputs.parameters.member_clusters}}" + empty_char= + + if [ -z ${primary_cluster} ] || [ "${primary_cluster}" = "$empty_char" ]; then + primary_cluster=${current_cluster} + fi + + S3_Service="s3://ap-northeast-2" + cp /kube/value kubeconfig_adm + export KUBECONFIG=kubeconfig_adm + + ################# + # updates + ################# + GIT_ACCOUNT={{workflow.parameters.github_account}} + if [[ $GIT_SVC_URL == https://* ]]; then + repository_base=https://${TOKEN//[$'\t\r\n ']}@${GIT_SVC_URL/http:\/\//}/${GIT_ACCOUNT}/ + else + repository_base=http://${TOKEN//[$'\t\r\n ']}@${GIT_SVC_URL/http:\/\//}/${GIT_ACCOUNT}/ + fi + + log "INFO" "##### change the loki target to $LOKI_HOST:$LOKI_PORT and $S3_Service (the current target is ${current_cluster})" + [ -d ${current_cluster} ] || git clone ${repository_base}${current_cluster} + cd ${current_cluster} + + yq -i e "del(.charts[] | select(.name == \"thanos-config\").override.objectStorage)" ${current_cluster}/lma/site-values.yaml + yq -i e ".charts |= map(select(.name == \"thanos-config\").override.objectStorage.type=\"s3\")" ${current_cluster}/lma/site-values.yaml + yq -i e ".charts |= map(select(.name == \"thanos-config\").override.objectStorage.rawConfig.endpoint=\"s3.ap-northeast-2.amazonaws.com\")" ${current_cluster}/lma/site-values.yaml + yq -i e ".charts |= map(select(.name == \"thanos-config\").override.objectStorage.rawConfig.region=\"ap-northeast-2\")" ${current_cluster}/lma/site-values.yaml + yq -i e ".charts |= map(select(.name == \"thanos-config\").override.objectStorage.rawConfig.bucket=\"${primary_cluster}-tks-thanos\")" ${current_cluster}/lma/site-values.yaml + yq -i e ".charts |= map(select(.name == \"thanos-config\").override.objectStorage.rawConfig.signature_version2=false)" ${current_cluster}/lma/site-values.yaml + + git config --global user.name "tks" + git config --global user.email "tks@sktelecom.com" + + if [[ `git status --porcelain` ]]; then + log "INFO" "##### commit changes on ${current_cluster} to use s3" + cmessage="changes on ${current_cluster} to use s3" + git add ${current_cluster}/lma/site-values.yaml + git commit -m "change loki and thanos endpoints. (by set-primary workflow)" -m "$cmessage" + git push + modified_clusters=${current_cluster} + # echo -n "${current_cluster} " >> /mnt/out/modified_cluster_list.txt + else + log "INFO" "No change on the cluster ${current_cluster}" + echo NO_CHANGE_HERE > /mnt/out/modified_cluster_list.txt + fi + cd - + rm -rf ${current_cluster} + + jq -n '$ARGS.positional' --args $modified_clusters > /mnt/out/modified_cluster_list.txt + + env: + - name: OBJECT_SOTRE + value: "{{workflow.parameters.object_store}}" + envFrom: + - secretRef: + name: "git-svc-token" + volumeMounts: + - name: kubeconfig-adm + mountPath: "/kube" + - name: out + mountPath: /mnt/out + volumes: + - name: out + emptyDir: {} + outputs: + parameters: + - name: changed + valueFrom: + path: /mnt/out/modified_cluster_list.txt + activeDeadlineSeconds: 900 + - name: sub-sync-organization-changes inputs: parameters: @@ -658,10 +759,10 @@ spec: git add ${primary_cluster}/lma/site-values.yaml git commit -m "change thanos-query stores. (by set-primary workflow)" -m "$cmessage" git push - echo ${primary_cluster} > /mnt/out/primary_cluster.txt + echo ${primary_cluster} > /mnt/out/changed.txt else log "INFO" "No change on the cluster ${member}" - echo NO_CHANGE_HERE > /mnt/out/primary_cluster.txt + echo NO_CHANGE_HERE > /mnt/out/changed.txt fi if [ "$OBJECT_SOTRE" != "s3" ]; then @@ -673,7 +774,7 @@ spec: git add ${primary_cluster}/lma/site-values.yaml git commit -m "change iamRoles(s3). (by set-primary workflow)" -m "$cmessage" git push - echo ${primary_cluster} > /mnt/out/primary_cluster.txt + echo ${primary_cluster} > /mnt/out/changed.txt else log "INFO" "(iamRoles) No change on the cluster ${member}" fi @@ -702,9 +803,9 @@ spec: emptyDir: {} outputs: parameters: - - name: primary_cluster + - name: changed valueFrom: - path: /mnt/out/primary_cluster.txt + path: /mnt/out/changed.txt activeDeadlineSeconds: 900