From c56b13f65afb2f90803edea1cc8393b95f46e28b Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 17 Nov 2022 15:51:57 +0530 Subject: [PATCH 01/31] Added cloud storage services AWS and GCP --- .../templates/job-cluster-jobmanager.yaml | 16 +++++++++++- .../templates/job-cluster-taskmanager.yaml | 13 ++++++++++ .../datapipeline/flink-jobs/values.j2 | 8 ++++++ .../templates/cluster-config.json.j2 | 25 ++++++++++++++++++- .../templates/flink_job_deployment.yaml | 10 ++++++++ .../helm_charts/datapipeline_jobs/values.j2 | 3 +++ 6 files changed, 73 insertions(+), 2 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 1e7c5e2778..91534a85d5 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -20,7 +20,8 @@ spec: imagePullPolicy: Always workingDir: /opt/flink command: ["/opt/flink/bin/standalone-job.sh"] -{{- $job-config-key := .Release.Name }} +#{{- $job-config-key := .Release.Name }} + args: ["start-foreground", "--job-classname={{ index .Values $job-config-key.job_classname }}", "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", @@ -29,7 +30,20 @@ spec: "-Dblob.server.port=6124", "-Dqueryable-state.server.ports=6125", "-Djobmanager.heap.size={{ index .Values $job-config-key.job_manager_heap_size }}", +{{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" +{{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: - containerPort: 6123 diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index cad7c3f47c..348bdd66e4 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -29,7 +29,20 @@ spec: "-Djobmanager.rpc.address={{ .Release.Name }}-jobmanager", "-Dtaskmanager.rpc.port=6122", "-Dtaskmanager.heap.size={{ index .Values $job-config-key.task_manager_heap_size }}", +{{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "s3" }} + "-Ds3.access-key={{ .Values.s3_access_key }}", + "-Ds3.secret-key={{ .Values.s3_secret_key }}", + "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}", +{{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: - containerPort: 6122 diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index fd34c8c647..7d3bb58f8a 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -8,6 +8,14 @@ image_tag: {{ image_tag }} azure_storage_account={{ sunbird_private_storage_account_name }} azure_storage_secret={{ sunbird_private_storage_account_key }} +s3_access_key: {{ s3_storage_key }} +s3_secret_key: {{ s3_storage_secret }} +s3_endpoint: {{ s3_storage_endpoint }} +s3_path_style_access: {{ s3_path_style_access }} +gcloud_client_key: {{ gcloud_client_key }} +gcloud_private_secret: {{ gcloud_private_secret }} +gcloud_project_id: {{ gcloud_project_id }} + telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index 1a26514684..a41ccd6307 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,12 +1,35 @@ { "jars": [ + {% if checkpoint_store_type == "azure" %} "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", + "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% endif %} ], - "file": "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + "file": + {% if checkpoint_store_type == "azure" %} + "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% endif %} "files": [ + {% if checkpoint_store_type == "azure" %} "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + {% elif checkpoint_store_type == "s3" %} + "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", + {% elif checkpoint_store_type == "gcloud" %} + "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", + {% endif %} ], "className": "org.ekstep.analytics.job.JobExecutor", "executorCores": {{ spark_cluster.executor_core }}, diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index 10e6b62181..a103a13788 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -118,6 +118,11 @@ spec: "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -199,6 +204,11 @@ spec: "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", +{{- end }} +{{- if eq .Values.checkpoint_store_type "gcloud" }} + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 22630c3015..228dd7a151 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -10,6 +10,9 @@ s3_access_key: {{ s3_storage_key }} s3_secret_key: {{ s3_storage_secret }} s3_endpoint: {{ s3_storage_endpoint }} s3_path_style_access: {{ s3_path_style_access }} +gcloud_client_key: {{ gcloud_client_key }} +gcloud_private_secret: {{ gcloud_private_secret }} +gcloud_project_id: {{ gcloud_project_id }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} From 43e5743bf8fa0ac3ef8c70a4452f90dad68d473b Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 17 Nov 2022 15:58:11 +0530 Subject: [PATCH 02/31] Added cloud storage services AWS and GCP --- .../flink-jobs/templates/job-cluster-jobmanager.yaml | 2 +- .../datapipeline_jobs/templates/flink_job_deployment.yaml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 91534a85d5..c2d4918111 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -20,7 +20,7 @@ spec: imagePullPolicy: Always workingDir: /opt/flink command: ["/opt/flink/bin/standalone-job.sh"] -#{{- $job-config-key := .Release.Name }} +{{- $job-config-key := .Release.Name }} args: ["start-foreground", "--job-classname={{ index .Values $job-config-key.job_classname }}", diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index a103a13788..ab76362787 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -206,9 +206,9 @@ spec: "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", + "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", From 09bb4d736378a6dccb1395e466b2e33ed5bd671c Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Tue, 29 Nov 2022 03:36:41 +0530 Subject: [PATCH 03/31] updated common vars --- .../roles/data-products-deploy/templates/common.conf.j2 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index bde88ec9d4..f3847c65c5 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -15,10 +15,10 @@ storage.secret.config="{{ dp_storage_secret_config }}" reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} -cloud_storage_type="azure" +cloud_service_provider="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} -cloud_storage_type="s3" -cloud_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_service_provider="s3" +cloud_public_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" @@ -305,4 +305,4 @@ uci.fushionauth.postgres.pass="{{ uci_postgres.fushionauth_db_psss }}" uci.exhaust.store.prefix="" uci.encryption.secret="{{ uci_encryption_secret_key }}" -// END OF UCI Related Job Configs \ No newline at end of file +// END OF UCI Related Job Configs From 0c9b76394f36ab10b3c92ec05fcb71e273d08c6f Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Thu, 15 Dec 2022 17:41:38 +0530 Subject: [PATCH 04/31] common variable changes in data-pipeline --- .../templates/job-cluster-jobmanager.yaml | 14 ++++---- .../datapipeline/flink-jobs/values.j2 | 14 +++----- .../templates/cluster-config.json.j2 | 32 +++++++++---------- .../templates/flink_job_deployment.yaml | 28 ++++++++-------- .../helm_charts/datapipeline_jobs/values.j2 | 14 +++----- 5 files changed, 47 insertions(+), 55 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index c2d4918111..618ab35706 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -31,18 +31,18 @@ spec: "-Dqueryable-state.server.ports=6125", "-Djobmanager.heap.size={{ index .Values $job-config-key.job_manager_heap_size }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 7d3bb58f8a..6c06f450f8 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -6,16 +6,12 @@ dockerhub: {{ dockerhub }} repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} -azure_storage_account={{ sunbird_private_storage_account_name }} -azure_storage_secret={{ sunbird_private_storage_account_key }} -s3_access_key: {{ s3_storage_key }} -s3_secret_key: {{ s3_storage_secret }} -s3_endpoint: {{ s3_storage_endpoint }} +checkpoint_store_type: {{ cloud_storage_type }} //Need to check +cloud_storage_key: {{ cloud_public_storage_accountname }} +cloud_storage_secret: {{ cloud_public_storage_secret }} +cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} s3_path_style_access: {{ s3_path_style_access }} -gcloud_client_key: {{ gcloud_client_key }} -gcloud_private_secret: {{ gcloud_private_secret }} -gcloud_project_id: {{ gcloud_project_id }} - +cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index a41ccd6307..bb6347e35a 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,33 +1,33 @@ { "jars": [ - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" {% endif %} ], "file": - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", {% endif %} "files": [ - {% if checkpoint_store_type == "azure" %} - "wasbs://{{ bucket }}@{{sunbird_private_storage_account_name}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - {% elif checkpoint_store_type == "s3" %} + {% if cloud_storage_type == "azure" %} + "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/application.conf" + {% elif cloud_storage_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", - {% elif checkpoint_store_type == "gcloud" %} + {% elif cloud_storage_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", {% endif %} ], @@ -44,7 +44,7 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ sunbird_private_storage_account_name }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{sunbird_private_storage_account_name}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" } } diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index ab76362787..fb3cb0dee8 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -111,18 +111,18 @@ spec: args: ["start-foreground", "--job-classname={{ .Values.job_classname }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net={{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -197,18 +197,18 @@ spec: command: ["/opt/flink/bin/taskmanager.sh"] args: ["start-foreground", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_account }}.blob.core.windows.net={{ .Values.azure_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net={{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "s3" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}" + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 228dd7a151..414f5f6b5d 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -3,16 +3,12 @@ imagepullsecrets: {{ imagepullsecrets }} dockerhub: {{ dockerhub }} repository: {{flink_repository|default('sunbird-datapipeline')}} image_tag: {{ image_tag }} -checkpoint_store_type: {{ checkpoint_store_type }} -azure_account: {{ azure_account }} -azure_secret: {{ azure_secret }} -s3_access_key: {{ s3_storage_key }} -s3_secret_key: {{ s3_storage_secret }} -s3_endpoint: {{ s3_storage_endpoint }} +checkpoint_store_type: {{ cloud_storage_type }} +cloud_storage_key: {{ cloud_public_storage_accountname }} +cloud_storage_secret: {{ cloud_public_storage_secret }} +cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} s3_path_style_access: {{ s3_path_style_access }} -gcloud_client_key: {{ gcloud_client_key }} -gcloud_private_secret: {{ gcloud_private_secret }} -gcloud_project_id: {{ gcloud_project_id }} +cloud_storage_project_id: {{ cloud_public_storage_project }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} From 9e3ddd4a5c4e2cbc89f85bba83c12d4da29ae726 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 16 Dec 2022 14:52:09 +0530 Subject: [PATCH 05/31] common variable changes in data-pipeline --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 414f5f6b5d..7e52ae02e4 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -3,7 +3,7 @@ imagepullsecrets: {{ imagepullsecrets }} dockerhub: {{ dockerhub }} repository: {{flink_repository|default('sunbird-datapipeline')}} image_tag: {{ image_tag }} -checkpoint_store_type: {{ cloud_storage_type }} +checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} From 645db596a76dfdf28a0cdfadfb6acfd96db91b04 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 15:38:16 +0530 Subject: [PATCH 06/31] updated secor csp variables --- ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml index b8cd06675e..07c02ac1c6 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml @@ -2,8 +2,8 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -azure_container_name: "{{secor_azure_container_name}}" -azure_account_key: "{{sunbird_private_storage_account_key}}" +cloud_storage_telemetry_bucketname: "{{secor_azure_container_name}}" +cloud_private_storage_secret: "{{sunbird_private_storage_account_key}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" From 0c5f8ce14b8edd4573110ec7f2e8bbab81803d87 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 15:42:11 +0530 Subject: [PATCH 07/31] updated analytics spark csp variables --- .../roles/analytics-spark-provision/templates/spark-env.j2 | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index dea6e5ad06..53bf3c3888 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -72,8 +72,8 @@ export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" export reports_storage_key={{sunbird_private_storage_account_name}} export reports_storage_secret={{sunbird_private_storage_account_key}} -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} +export cloud_private_storage_accountname={{sunbird_private_storage_account_name}} +export cloud_private_storage_secret={{sunbird_private_storage_account_key}} export druid_storage_account_key={{sunbird_public_storage_account_name}} export druid_storage_account_secret={{sunbird_public_storage_account_key}} export aws_storage_key={{ s3_storage_key }} From b9658df438cf8a9926d8af456ec3b489a95ebbd1 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 19 Dec 2022 16:01:45 +0530 Subject: [PATCH 08/31] common variable changes in datapipeline/flink-jobs --- .../flink-jobs/templates/job-cluster-jobmanager.yaml | 2 +- .../flink-jobs/templates/job-cluster-taskmanager.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 618ab35706..6b25ab6c53 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -33,7 +33,7 @@ spec: {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} +{{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.cloud_storage_key }}", "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index 348bdd66e4..ecf59608db 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -32,7 +32,7 @@ spec: {{- if eq .Values.checkpoint_store_type "azure" }} "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", {{- end }} -{{- if eq .Values.checkpoint_store_type "s3" }} +{{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.s3_access_key }}", "-Ds3.secret-key={{ .Values.s3_secret_key }}", "-Ds3.endpoint={{ .Values.s3_endpoint }}", From 72bae4d12469e403f75fc5fac0a7450b8c8ae51e Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 21 Dec 2022 11:28:12 +0530 Subject: [PATCH 09/31] common variable changes in datapipeline/flink-jobs --- .../datapipeline/flink-jobs/values.j2 | 4 +- .../roles/analytics-druid/defaults/main.yml | 44 +++++++++---------- .../templates/cluster-config.json.j2 | 18 ++++---- .../templates/common.conf.j2 | 4 +- 4 files changed, 35 insertions(+), 35 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 6c06f450f8..f03a3f2459 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -6,11 +6,11 @@ dockerhub: {{ dockerhub }} repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} -checkpoint_store_type: {{ cloud_storage_type }} //Need to check +checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ s3_path_style_access }} +s3_path_style_access: {{ cloud_storage_path_style_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 8a55bc3f97..a22a3b9c0e 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -31,7 +31,7 @@ druid_request_logging_type: "file" #Druid Extensions -druid_storage_type: "azure" +druid_storage_type: {{ cloud_storage_telemetry_type }} druid_extensions_list : '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' @@ -40,7 +40,7 @@ druid_community_extensions: # End of druid_extensions -druid_indexing_logs_type: azure +druid_indexing_logs_type: {{ cloud_storage_telemetry_type }} druid_indexing_log_dir: /var/druid/indexing-logs druid_indexing_storage_type : metadata druid_indexing_task_basedir : "/var/task" @@ -126,23 +126,23 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_public_storage_accountname }}" + azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure - druid_log_azure_container: "{{ druid_azure_container_name }}" + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" druid_log_azure_folder: "druidlogs" #Druid S3 Details - druid_storage_type: "{{ druid_storage_type }}" - s3_access_key: "{{ s3_storage_key }}" - s3_secret_key: "{{ s3_storage_secret }}" - s3_bucket: "{{ s3_storage_container }}" - s3_endpoint: "{{ s3_storage_endpoint }}" + druid_storage_type: "{{ cloud_storage_telemetry_type }}" + s3_access_key: "{{ cloud_public_storage_accountname }}" + s3_secret_key: "{{ cloud_public_storage_secret }}" + s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" + s3_endpoint: "{{ cloud_public_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ s3_path_style_access }}" - s3_v4_sign_region: "{{ s3_default_bucket_location }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 - s3_logging_bucket: "{{ s3_storage_container }}" + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m @@ -200,23 +200,23 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_public_storage_accountname }}" + azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure - druid_log_azure_container: "{{ druid_azure_container_name }}" + druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" druid_log_azure_folder: "druidlogs" #Druid S3 Details - druid_storage_type: "{{ druid_storage_type }}" + druid_storage_type: "{{ cloud_storage_telemetry_type }}" s3_access_key: "{{ s3_storage_key }}" s3_secret_key: "{{ s3_storage_secret }}" s3_bucket: "{{ s3_storage_container }}" s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ s3_path_style_access }}" - s3_v4_sign_region: "{{ s3_default_bucket_location }}" + s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 - s3_logging_bucket: "{{ s3_storage_container }}" + s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index bb6347e35a..f0d4e57ae6 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -1,33 +1,33 @@ { "jars": [ - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_core_artifact }}", "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ scruid_artifact }}", "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_core_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ scruid_artifact }}", "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_ed_dataporducts_jar_artifact }}" {% endif %} ], "file": - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}", {% endif %} "files": [ - {% if cloud_storage_type == "azure" %} + {% if cloud_storage_telemetry_type == "azure" %} "wasbs://{{ bucket }}@{{cloud_private_storage_accountname}}.blob.core.windows.net/models-{{ model_version }}/application.conf" - {% elif cloud_storage_type == "s3" %} + {% elif cloud_storage_telemetry_type == "s3" %} "s3://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", - {% elif cloud_storage_type == "gcloud" %} + {% elif cloud_storage_telemetry_type == "gcloud" %} "gs://{{ bucket }}/models-{{ model_version }}/{{ analytics_batch_module_artifact }}/application.conf", {% endif %} ], diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index f3847c65c5..24159515e3 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -15,9 +15,9 @@ storage.secret.config="{{ dp_storage_secret_config }}" reports.storage.key.config="{{ dp_reports_storage_key_config }}" reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" {% if dp_object_store_type == "azure" %} -cloud_service_provider="azure" +cloud_storage_telemetry_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} -cloud_service_provider="s3" +cloud_storage_telemetry_type="s3" cloud_public_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" aws_storage_key="{{ s3_storage_key }}" From 2003a182928ee2a3743c115260cc1e91808de3a1 Mon Sep 17 00:00:00 2001 From: santhosh-tg <93243580+santhosh-tg@users.noreply.github.com> Date: Wed, 21 Dec 2022 15:46:08 +0530 Subject: [PATCH 10/31] Release 5.1.0 - CSP changes (#1591) * Update influxdb role * Add cloud agnostic vars Add AWS role * generate sas token ondemand * Fix destination path * Update sas_token expiry to one hour --- ansible/artifacts-download.yml | 40 +++++++++++++++--- ansible/artifacts-upload.yml | 41 ++++++++++++++++--- .../roles/aws-cloud-storage/defaults/main.yml | 3 ++ .../aws-cloud-storage/tasks/delete-folder.yml | 9 ++++ .../roles/aws-cloud-storage/tasks/delete.yml | 9 ++++ .../aws-cloud-storage/tasks/download.yml | 9 ++++ .../roles/aws-cloud-storage/tasks/main.yml | 18 ++++++++ .../aws-cloud-storage/tasks/upload-folder.yml | 9 ++++ .../roles/aws-cloud-storage/tasks/upload.yml | 9 ++++ .../tasks/delete-using-azcopy.yml | 12 +++++- .../tasks/upload-using-azcopy.yml | 14 ++++++- .../roles/cassandra-backup/defaults/main.yml | 9 +--- ansible/roles/cassandra-backup/tasks/main.yml | 24 ++++++++--- .../roles/cassandra-restore/defaults/main.yml | 8 +--- .../roles/cassandra-restore/tasks/main.yml | 28 +++++++++---- .../roles/es-azure-snapshot/tasks/main.yml | 4 +- .../roles/gcp-cloud-storage/defaults/main.yml | 6 +-- .../gcp-cloud-storage/tasks/download.yml | 4 +- .../gcp-cloud-storage/tasks/upload-batch.yml | 2 +- .../roles/gcp-cloud-storage/tasks/upload.yml | 2 +- .../roles/influxdb_backup/defaults/main.yml | 9 +--- ansible/roles/influxdb_backup/tasks/main.yml | 28 ++++++++++++- .../roles/influxdb_restore/defaults/main.yml | 10 +---- ansible/roles/influxdb_restore/tasks/main.yml | 29 +++++++++++-- .../defaults/main.yml | 9 +--- .../tasks/main.yml | 26 ++++++++---- .../defaults/main.yml | 10 +---- .../postgres-managed-service/tasks/main.yml | 24 ++++++++--- .../roles/postgresql-backup/defaults/main.yml | 13 +----- .../roles/postgresql-backup/tasks/main.yml | 24 ++++++++--- .../postgresql-restore/defaults/main.yml | 9 +--- .../roles/postgresql-restore/tasks/main.yml | 29 ++++++------- ansible/roles/redis-backup/defaults/main.yml | 9 +--- ansible/roles/redis-backup/tasks/main.yml | 24 ++++++++--- .../defaults/main.yml | 9 +--- .../redis-multiprocess-backup/tasks/main.yml | 24 ++++++++--- .../defaults/main.yml | 9 +--- .../redis-multiprocess-restore/tasks/main.yml | 25 ++++++++--- ansible/roles/redis-restore/defaults/main.yml | 9 +--- ansible/roles/redis-restore/tasks/main.yml | 23 ++++++++--- 40 files changed, 426 insertions(+), 186 deletions(-) create mode 100644 ansible/roles/aws-cloud-storage/defaults/main.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/delete-folder.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/delete.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/download.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/main.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/upload-folder.yml create mode 100644 ansible/roles/aws-cloud-storage/tasks/upload.yml diff --git a/ansible/artifacts-download.yml b/ansible/artifacts-download.yml index e75c4a941a..9db0efb42f 100644 --- a/ansible/artifacts-download.yml +++ b/ansible/artifacts-download.yml @@ -3,8 +3,38 @@ become: yes vars_files: - "{{inventory_dir}}/secrets.yml" - environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_artifact_storage_account_name}}" - AZURE_STORAGE_SAS_TOKEN: "{{sunbird_artifact_storage_account_sas}}" - roles: - - artifacts-download-azure + tasks: + - name: download artifact from azure storage + include_role: + name: azure-cloud-storage + tasks_from: blob-download.yml + vars: + blob_container_name: "{{ cloud_storage_artifacts_bucketname }}" + blob_file_name: "{{ artifact }}" + local_file_or_folder_path: "{{ artifact_path }}" + storage_account_name: "{{ cloud_artifact_storage_accountname }}" + storage_account_key: "{{ cloud_artifact_storage_secret }}" + when: cloud_service_provider == "azure" + + - name: download artifact from gcloud storage + include_role: + name: gcp-cloud-storage + tasks_from: download.yml + vars: + gcp_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + gcp_path: "{{ artifact }}" + local_file_or_folder_path: "{{ artifact_path }}" + when: cloud_service_provider == "gcloud" + + - name: download artifact from aws s3 + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + s3_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + s3_path: "{{ artifact }}" + aws_default_region: "{{ cloud_public_storage_region }}" + aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" + aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" + when: cloud_service_provider == "aws" diff --git a/ansible/artifacts-upload.yml b/ansible/artifacts-upload.yml index 41ef0387ef..3bdbe73017 100644 --- a/ansible/artifacts-upload.yml +++ b/ansible/artifacts-upload.yml @@ -3,8 +3,39 @@ become: yes vars_files: - "{{inventory_dir}}/secrets.yml" - environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_artifact_storage_account_name}}" - AZURE_STORAGE_SAS_TOKEN: "{{sunbird_artifact_storage_account_sas}}" - roles: - - artifacts-upload-azure + tasks: + - name: upload artifact to azure storage + include_role: + name: azure-cloud-storage + tasks_from: blob-upload.yml + vars: + blob_container_name: "{{ cloud_storage_artifacts_bucketname }}" + container_public_access: "off" + blob_file_name: "{{ artifact }}" + local_file_or_folder_path: "{{ artifact_path }}" + storage_account_name: "{{ cloud_artifact_storage_accountname }}" + storage_account_key: "{{ cloud_artifact_storage_secret }}" + when: cloud_service_provider == "azure" + + - name: upload artifact to gcloud storage + include_role: + name: gcp-cloud-storage + tasks_from: upload.yml + vars: + gcp_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + gcp_path: "{{ artifact }}" + local_file_or_folder_path: "{{ artifact_path }}" + when: cloud_service_provider == "gcloud" + + - name: upload artifact to aws s3 + include_role: + name: aws-cloud-storage + tasks_from: upload.yml + vars: + local_file_or_folder_path: "{{ artifact_path }}" + s3_bucket_name: "{{ cloud_storage_artifacts_bucketname }}" + s3_path: "{{ artifact }}" + aws_default_region: "{{ cloud_public_storage_region }}" + aws_access_key_id: "{{ cloud_artifact_storage_accountname }}" + aws_secret_access_key: "{{ cloud_artifact_storage_secret }}" + when: cloud_service_provider == "aws" diff --git a/ansible/roles/aws-cloud-storage/defaults/main.yml b/ansible/roles/aws-cloud-storage/defaults/main.yml new file mode 100644 index 0000000000..6f3f6f86d6 --- /dev/null +++ b/ansible/roles/aws-cloud-storage/defaults/main.yml @@ -0,0 +1,3 @@ +s3_bucket_name: "" +s3_path: "" +local_file_or_folder_path: "" diff --git a/ansible/roles/aws-cloud-storage/tasks/delete-folder.yml b/ansible/roles/aws-cloud-storage/tasks/delete-folder.yml new file mode 100644 index 0000000000..c912b14edb --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/delete-folder.yml @@ -0,0 +1,9 @@ +--- +- name: delete files and folders recursively + environment: + AWS_DEFAULT_REGION: "{{ aws_default_region }}" + AWS_ACCESS_KEY_ID: "{{ aws_access_key_id }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_access_key }}" + shell: "aws s3 rm s3://{{ s3_bucket_name }}/{{ s3_path }} --recursive" + async: 3600 + poll: 10 diff --git a/ansible/roles/aws-cloud-storage/tasks/delete.yml b/ansible/roles/aws-cloud-storage/tasks/delete.yml new file mode 100644 index 0000000000..414ea52e6b --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/delete.yml @@ -0,0 +1,9 @@ +--- +- name: delete files from s3 + environment: + AWS_DEFAULT_REGION: "{{ aws_default_region }}" + AWS_ACCESS_KEY_ID: "{{ aws_access_key_id }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_access_key }}" + shell: "aws s3 rm s3://{{ s3_bucket_name }}/{{ s3_path }}" + async: 3600 + poll: 10 diff --git a/ansible/roles/aws-cloud-storage/tasks/download.yml b/ansible/roles/aws-cloud-storage/tasks/download.yml new file mode 100644 index 0000000000..138024af78 --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/download.yml @@ -0,0 +1,9 @@ +--- +- name: download files to s3 + environment: + AWS_DEFAULT_REGION: "{{ aws_default_region }}" + AWS_ACCESS_KEY_ID: "{{ aws_access_key_id }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_access_key }}" + shell: "aws s3 cp s3://{{ s3_bucket_name }}/{{ s3_path }} {{ local_file_or_folder_path }}" + async: 3600 + poll: 10 diff --git a/ansible/roles/aws-cloud-storage/tasks/main.yml b/ansible/roles/aws-cloud-storage/tasks/main.yml new file mode 100644 index 0000000000..62f204a9d2 --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/main.yml @@ -0,0 +1,18 @@ +--- +- name: delete files from aws S3 bucket + include: delete.yml + +- name: delete folders from aws S3 bucket recursively + include: delete-folder.yml + + +- name: download file from S3 + include: download.yml + +- name: upload files from a local to aws S3 + include: upload.yml + +- name: upload files and folder from local directory to aws S3 + include: upload-folder.yml + + diff --git a/ansible/roles/aws-cloud-storage/tasks/upload-folder.yml b/ansible/roles/aws-cloud-storage/tasks/upload-folder.yml new file mode 100644 index 0000000000..3e03b068b7 --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/upload-folder.yml @@ -0,0 +1,9 @@ +--- +- name: upload folder to s3 + environment: + AWS_DEFAULT_REGION: "{{ aws_default_region }}" + AWS_ACCESS_KEY_ID: "{{ aws_access_key_id }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_access_key }}" + shell: "aws s3 cp {{ local_file_or_folder_path }} s3://{{ s3_bucket_name }}/{{ s3_path }} --recursive" + async: 3600 + poll: 10 diff --git a/ansible/roles/aws-cloud-storage/tasks/upload.yml b/ansible/roles/aws-cloud-storage/tasks/upload.yml new file mode 100644 index 0000000000..af8de990e2 --- /dev/null +++ b/ansible/roles/aws-cloud-storage/tasks/upload.yml @@ -0,0 +1,9 @@ +--- +- name: upload files to s3 + environment: + AWS_DEFAULT_REGION: "{{ aws_default_region }}" + AWS_ACCESS_KEY_ID: "{{ aws_access_key_id }}" + AWS_SECRET_ACCESS_KEY: "{{ aws_secret_access_key }}" + shell: "aws s3 cp {{ local_file_or_folder_path }} s3://{{ s3_bucket_name }}/{{ s3_path }}" + async: 3600 + poll: 10 diff --git a/ansible/roles/azure-cloud-storage/tasks/delete-using-azcopy.yml b/ansible/roles/azure-cloud-storage/tasks/delete-using-azcopy.yml index 236169e86c..196de9c9b3 100644 --- a/ansible/roles/azure-cloud-storage/tasks/delete-using-azcopy.yml +++ b/ansible/roles/azure-cloud-storage/tasks/delete-using-azcopy.yml @@ -1,6 +1,16 @@ --- +- name: generate SAS token for azcopy + shell: | + sas_expiry=`date -u -d "1 hour" '+%Y-%m-%dT%H:%MZ'` + sas_token=?`az storage container generate-sas -n {{ blob_container_name }} --account-name {{ storage_account_name }} --account-key {{ storage_account_key }} --https-only --permissions dlrw --expiry $sas_expiry -o tsv` + echo $sas_token + register: sas_token + +- set_fact: + container_sas_token: "{{ sas_token.stdout}}" + - name: delete files and folders from azure storage using azcopy - shell: "azcopy rm 'https://{{ storage_account_name }}.blob.core.windows.net/{{ blob_container_name }}{{ blob_container_folder_path }}{{ storage_account_sas_token }}' --recursive" + shell: "azcopy rm 'https://{{ storage_account_name }}.blob.core.windows.net/{{ blob_container_name }}{{ blob_container_folder_path }}{{ container_sas_token }}' --recursive" environment: AZCOPY_CONCURRENT_FILES: "10" async: 10800 diff --git a/ansible/roles/azure-cloud-storage/tasks/upload-using-azcopy.yml b/ansible/roles/azure-cloud-storage/tasks/upload-using-azcopy.yml index 99ab3c2bf8..d86b233ead 100644 --- a/ansible/roles/azure-cloud-storage/tasks/upload-using-azcopy.yml +++ b/ansible/roles/azure-cloud-storage/tasks/upload-using-azcopy.yml @@ -1,12 +1,22 @@ --- +- name: generate SAS token for azcopy + shell: | + sas_expiry=`date -u -d "1 hour" '+%Y-%m-%dT%H:%MZ'` + sas_token=?`az storage container generate-sas -n {{ blob_container_name }} --account-name {{ storage_account_name }} --account-key {{ storage_account_key }} --https-only --permissions dlrw --expiry $sas_expiry -o tsv` + echo $sas_token + register: sas_token + +- set_fact: + container_sas_token: "{{ sas_token.stdout}}" + - name: create container in azure storage if it doesn't exist include_role: name: azure-cloud-storage tasks_from: container-create.yml - name: upload files and folders to azure storage using azcopy - shell: "azcopy copy {{ local_file_or_folder_path }} 'https://{{ storage_account_name }}.blob.core.windows.net/{{ blob_container_name }}{{ blob_container_folder_path }}{{ storage_account_sas_token }}' --recursive" + shell: "azcopy copy {{ local_file_or_folder_path }} 'https://{{ storage_account_name }}.blob.core.windows.net/{{ blob_container_name }}{{ blob_container_folder_path }}{{ container_sas_token }}' --recursive" environment: AZCOPY_CONCURRENT_FILES: "10" async: 10800 - poll: 10 \ No newline at end of file + poll: 10 diff --git a/ansible/roles/cassandra-backup/defaults/main.yml b/ansible/roles/cassandra-backup/defaults/main.yml index e87a6a43c9..e47f87ef05 100644 --- a/ansible/roles/cassandra-backup/defaults/main.yml +++ b/ansible/roles/cassandra-backup/defaults/main.yml @@ -1,10 +1,5 @@ cassandra_root_dir: /etc/cassandra cassandra_backup_dir: /data/cassandra/backup -cassandra_backup_azure_container_name: dp-cassandra-backup -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -cassandra_backup_storage: "{{ cassandra_backup_azure_container_name }}" +cloud_storage_dpcassandrabackup_bucketname: "{{cloud_storage_management_bucketname}}" +cloud_storage_dpcassandrabackup_foldername: dp-cassandra-backup diff --git a/ansible/roles/cassandra-backup/tasks/main.yml b/ansible/roles/cassandra-backup/tasks/main.yml index bc219b237e..f3cf8e916e 100755 --- a/ansible/roles/cassandra-backup/tasks/main.yml +++ b/ansible/roles/cassandra-backup/tasks/main.yml @@ -30,22 +30,34 @@ name: azure-cloud-storage tasks_from: upload-using-azcopy.yml vars: - blob_container_name: "{{ cassandra_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpcassandrabackup_foldername }}" container_public_access: "off" blob_container_folder_path: "" local_file_or_folder_path: "{{ cassandra_backup_gzip_file_path }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_sas_token: "{{ azure_management_storage_account_sas }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: upload backup to S3 + include_role: + name: aws-cloud-storage + tasks_from: upload-folder.yml + vars: + local_file_or_folder_path: "{{ cassandra_backup_gzip_file_path }}" + s3_bucket_name: "{{ cloud_storage_dpcassandrabackup_bucketname }}" + s3_path: "{{ cloud_storage_dpcassandrabackup_foldername }}/{{ cassandra_backup_gzip_file_name}}" + aws_default_region: "{{ cloud_public_storage_region }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + when: cloud_service_provider == "aws" + - name: upload file to gcloud storage include_role: name: gcp-cloud-storage tasks_from: upload-batch.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ cassandra_backup_storage }}" - dest_folder_path: "" + gcp_bucket_name: "{{ cloud_storage_dpcassandrabackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpcassandrabackup_foldername }}/{{ cassandra_backup_gzip_file_name}}" local_file_or_folder_path: "{{ cassandra_backup_gzip_file_path }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/cassandra-restore/defaults/main.yml b/ansible/roles/cassandra-restore/defaults/main.yml index 54f332d102..658def4ea8 100644 --- a/ansible/roles/cassandra-restore/defaults/main.yml +++ b/ansible/roles/cassandra-restore/defaults/main.yml @@ -1,8 +1,4 @@ user_home: "/home/{{ ansible_ssh_user }}/" -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -cassandra_backup_storage: "{{ cassandra_backup_azure_container_name }}" +cloud_storage_dpcassandrabackup_bucketname: "{{cloud_storage_management_bucketname}}" +cloud_storage_dpcassandrabackup_foldername: 'cassandra-backup' diff --git a/ansible/roles/cassandra-restore/tasks/main.yml b/ansible/roles/cassandra-restore/tasks/main.yml index 70f4ae92c8..c9dcceb130 100755 --- a/ansible/roles/cassandra-restore/tasks/main.yml +++ b/ansible/roles/cassandra-restore/tasks/main.yml @@ -3,7 +3,6 @@ - set_fact: artifact_path: "/tmp/{{ artifact }}" - artifacts_container: "{{ cassandra_backup_azure_container_name }}" cassandra_restore_dir: /tmp/cassandra_backup - name: download a file from azure storage @@ -12,22 +11,35 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ cassandra_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpcassandrabackup_foldername }}" blob_file_name: "{{ artifact }}" local_file_or_folder_path: "{{ artifact_path }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: download a file from aws s3 + become: true + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + s3_bucket_name: "{{ cloud_storage_dpcassandrabackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ artifact_path }}" + s3_path: "{{ cloud_storage_dpcassandrabackup_foldername }}/{{ artifact }}" + when: cloud_service_provider == "aws" + - name: download file from gcloud storage include_role: name: gcp-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ artifacts_container }}" - dest_file_name: "{{ artifact }}" - local_file_or_folder_path: "{{ cassandra_restore_dir }}" + gcp_bucket_name: "{{ cloud_storage_dpcassandrabackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpcassandrabackup_foldername }}/{{ artifact }}" + local_file_or_folder_path: "{{ artifact_path }}" when: cloud_service_provider == "gcloud" - name: extract the archive diff --git a/ansible/roles/es-azure-snapshot/tasks/main.yml b/ansible/roles/es-azure-snapshot/tasks/main.yml index 8ce0fcd267..23be535db9 100644 --- a/ansible/roles/es-azure-snapshot/tasks/main.yml +++ b/ansible/roles/es-azure-snapshot/tasks/main.yml @@ -13,8 +13,8 @@ vars: blob_container_name: "{{ es_backup_storage }}" container_public_access: "off" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" - name: Create Azure Repository uri: diff --git a/ansible/roles/gcp-cloud-storage/defaults/main.yml b/ansible/roles/gcp-cloud-storage/defaults/main.yml index 086cf9c50d..b0fd847b26 100644 --- a/ansible/roles/gcp-cloud-storage/defaults/main.yml +++ b/ansible/roles/gcp-cloud-storage/defaults/main.yml @@ -10,8 +10,8 @@ gcp_storage_key_file: "" # Folder name in GCP bucket # Example - -# dest_folder_name: "my-destination-folder" -dest_folder_name: "" +# gcp_path: "my-destination-folder" +gcp_path: "" # The delete pattern to delete files and folder # Example - @@ -36,7 +36,7 @@ dest_file_name: "" # The folder path in gcloud storage to upload the files starting from the root of the bucket # This path should start with / if we provide a value for this variable since we are going to append this path as below -# {{ bucket_name }}{{ dest_folder_name }} +# {{ bucket_name }}{{ gcp_path }} # The above translates to "my-bucket/my-folder-path" # Example - # dest_folder_path: "/my-folder/json-files-folder" diff --git a/ansible/roles/gcp-cloud-storage/tasks/download.yml b/ansible/roles/gcp-cloud-storage/tasks/download.yml index c8c6e956ad..73bf76bb04 100644 --- a/ansible/roles/gcp-cloud-storage/tasks/download.yml +++ b/ansible/roles/gcp-cloud-storage/tasks/download.yml @@ -3,9 +3,9 @@ include_tasks: gcloud-auth.yml - name: Download from gcloud storage - shell: gsutil cp "gs://{{ gcp_bucket_name }}/{{ dest_folder_name }}/{{ dest_file_name }}" "{{ local_file_or_folder_path }}" + shell: gsutil cp "gs://{{ gcp_bucket_name }}/{{ gcp_path }}" "{{ local_file_or_folder_path }}" async: 3600 poll: 10 - name: Revoke gcloud access - include_tasks: gcloud-revoke.yml \ No newline at end of file + include_tasks: gcloud-revoke.yml diff --git a/ansible/roles/gcp-cloud-storage/tasks/upload-batch.yml b/ansible/roles/gcp-cloud-storage/tasks/upload-batch.yml index 49abd5b822..dc103969aa 100644 --- a/ansible/roles/gcp-cloud-storage/tasks/upload-batch.yml +++ b/ansible/roles/gcp-cloud-storage/tasks/upload-batch.yml @@ -3,7 +3,7 @@ include_tasks: gcloud-auth.yml - name: Upload files from a local directory gcp storage - shell: gsutil -m cp -r "{{ local_file_or_folder_path }}" "gs://{{ gcp_bucket_name }}/{{ dest_folder_name }}/{{ dest_folder_path }}" + shell: gsutil -m cp -r "{{ local_file_or_folder_path }}" "gs://{{ gcp_bucket_name }}/{{ gcp_path}}" async: 3600 poll: 10 diff --git a/ansible/roles/gcp-cloud-storage/tasks/upload.yml b/ansible/roles/gcp-cloud-storage/tasks/upload.yml index 2f88d9407f..de766a94c7 100644 --- a/ansible/roles/gcp-cloud-storage/tasks/upload.yml +++ b/ansible/roles/gcp-cloud-storage/tasks/upload.yml @@ -3,7 +3,7 @@ include_tasks: gcloud-auth.yml - name: Upload to gcloud storage - shell: gsutil cp "{{ local_file_or_folder_path }}" "gs://{{ gcp_bucket_name }}/{{ dest_folder_name }}/{{ dest_file_name }}" + shell: gsutil cp "{{ local_file_or_folder_path }}" "gs://{{ gcp_bucket_name }}/{{ gcp_path }}" async: 3600 poll: 10 diff --git a/ansible/roles/influxdb_backup/defaults/main.yml b/ansible/roles/influxdb_backup/defaults/main.yml index cce66e6683..4104902eb6 100644 --- a/ansible/roles/influxdb_backup/defaults/main.yml +++ b/ansible/roles/influxdb_backup/defaults/main.yml @@ -2,11 +2,6 @@ influxdb_backup_dir: /tmp/influxdb_backup influxdb_backup_databases: ["monitoring_events"] influxdb_backup_file_prefix: influxdb_backup influxdb_backup_file_name: "{{ influxdb_backup_file_prefix }}-{{ ansible_date_time.date }}-{{ ansible_date_time.hour }}-{{ ansible_date_time.minute }}-{{ ansible_date_time.second }}" -azure_influxdb_backup_container: influxdb-backup -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -influxdb_backup_storage: "{{ azure_influxdb_backup_container }}" \ No newline at end of file +cloud_storage_influxdbbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_influxdbbackup_foldername: influxdb-backup diff --git a/ansible/roles/influxdb_backup/tasks/main.yml b/ansible/roles/influxdb_backup/tasks/main.yml index 952100bac5..11370325a0 100644 --- a/ansible/roles/influxdb_backup/tasks/main.yml +++ b/ansible/roles/influxdb_backup/tasks/main.yml @@ -17,10 +17,34 @@ name: azure-cloud-storage tasks_from: upload-using-azcopy.yml vars: - blob_container_name: "{{ influxdb_backup_storage }}" + blob_container_name: "{{ cloud_storage_influxdbbackup_foldername }}" container_public_access: "off" blob_container_folder_path: "" local_file_or_folder_path: "{{ influxdb_backup_dir }}/{{ influxdb_backup_file_name }}.zip" - storage_account_name: "{{ azure_management_storage_account_name }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" storage_account_sas_token: "{{ azure_management_storage_account_sas }}" when: cloud_service_provider == "azure" + +- name: upload file to aws s3 + include_role: + name: aws-cloud-storage + tasks_from: upload.yml + vars: + s3_bucket_name: "{{ cloud_storage_management_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ influxdb_backup_dir }}/{{ influxdb_backup_file_name }}.zip" + s3_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_backup_file_name }}.zip" + when: cloud_service_provider == "aws" + +- name: upload file to gcloud storage + include_role: + name: gcp-cloud-storage + tasks_from: upload.yml + vars: + gcp_bucket_name: "{{ cloud_storage_management_bucketname }}" + gcp_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_backup_file_name }}.zip" + local_file_or_folder_path: "{{ influxdb_backup_dir }}/{{ influxdb_backup_file_name }}.zip" + when: cloud_service_provider == "gcloud" + diff --git a/ansible/roles/influxdb_restore/defaults/main.yml b/ansible/roles/influxdb_restore/defaults/main.yml index 332b139439..c865e9ab49 100644 --- a/ansible/roles/influxdb_restore/defaults/main.yml +++ b/ansible/roles/influxdb_restore/defaults/main.yml @@ -1,8 +1,2 @@ -influxdb_backup_azure_container_name: influxdb-backup - -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -influxdb_backup_storage: "{{ influxdb_backup_azure_container_name }}" +cloud_storage_influxdbbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_influxdbbackup_foldername: influxdb-backup diff --git a/ansible/roles/influxdb_restore/tasks/main.yml b/ansible/roles/influxdb_restore/tasks/main.yml index 1b3bcec299..9c1f3cd436 100644 --- a/ansible/roles/influxdb_restore/tasks/main.yml +++ b/ansible/roles/influxdb_restore/tasks/main.yml @@ -12,13 +12,36 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ influxdb_backup_storage }}" + blob_container_name: "{{ cloud_storage_influxdbbackup_foldername }}" blob_file_name: "{{ influxdb_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ influxdb_restore_file_name }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: download a file from aws s3 + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + s3_bucket_name: "{{ cloud_storage_influxdbbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ postgres_restore_dir }}/{{ influxdb_restore_file_name }}" + s3_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_restore_file_name }}" + when: cloud_service_provider == "aws" + +- name: download file from gcloud storage + include_role: + name: gcp-cloud-storage + tasks_from: download.yml + vars: + gcp_bucket_name: "{{ cloud_storage_influxdbbackup_bucketname }}" + gcp_path: "{{ cloud_storage_influxdbbackup_foldername }}/{{ influxdb_restore_file_name }}" + local_file_or_folder_path: "/tmp/{{ influxdb_restore_file_name }}" + when: cloud_service_provider == "gcloud" + - name: Unarchieve backup file unarchive: src: /tmp/{{ influxdb_restore_file_name }} diff --git a/ansible/roles/postgres-managed-service-restore/defaults/main.yml b/ansible/roles/postgres-managed-service-restore/defaults/main.yml index 73f321c0b6..dc1cefe5bc 100644 --- a/ansible/roles/postgres-managed-service-restore/defaults/main.yml +++ b/ansible/roles/postgres-managed-service-restore/defaults/main.yml @@ -1,5 +1,4 @@ postgresql_restore_dir: /tmp/postgres-restore -postgres_backup_azure_container_name: dp-postgresql-backup db: name: ['analytics'] @@ -12,9 +11,5 @@ postgres_user: postgres_password: postgres_hostname: -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -postgres_backup_storage: "{{ postgres_backup_azure_container_name }}" +cloud_storage_dppostgresqlbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dppostgresqlbackup_foldername: dp-postgresql-backup diff --git a/ansible/roles/postgres-managed-service-restore/tasks/main.yml b/ansible/roles/postgres-managed-service-restore/tasks/main.yml index 4067b45f7a..db903740cd 100644 --- a/ansible/roles/postgres-managed-service-restore/tasks/main.yml +++ b/ansible/roles/postgres-managed-service-restore/tasks/main.yml @@ -14,22 +14,34 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ postgres_backup_storage }}" + blob_container_name: "{{ cloud_storage_dppostgresqlbackup_foldername }}" blob_file_name: "{{ postgres_backup_filename }}" local_file_or_folder_path: "{{ postgres_restore_dir }}/{{ postgres_backup_filepath }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: download a file from aws s3 + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + s3_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ postgres_restore_dir }}/{{ postgres_backup_filepath }}" + s3_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgres_backup_filename }}" + when: cloud_service_provider == "aws" + - name: download file from gcloud storage include_role: name: gcp-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ postgres_backup_storage }}" - dest_file_name: "{{ postgres_backup_filename }}" - local_file_or_folder_path: "{{ postgres_backup_filepath }}" + gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgres_backup_filename }}" + local_file_or_folder_path: "{{ postgres_restore_dir }}/{{ postgres_backup_filepath }}" when: cloud_service_provider == "gcloud" - name: unarchive artifact diff --git a/ansible/roles/postgres-managed-service/defaults/main.yml b/ansible/roles/postgres-managed-service/defaults/main.yml index e37800503e..b904c84887 100644 --- a/ansible/roles/postgres-managed-service/defaults/main.yml +++ b/ansible/roles/postgres-managed-service/defaults/main.yml @@ -1,13 +1,7 @@ postgresql_user: postgres postgresql_backup_dir: /tmp/dp-postgres -postgresql_backup_azure_container_name: dp-postgresql-backup - db_name: db: ['analytics'] -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -postgresql_backup_storage: "{{ postgresql_backup_azure_container_name }}" +cloud_storage_dppostgresqlbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dppostgresqlbackup_foldername: dp-postgresql-backup diff --git a/ansible/roles/postgres-managed-service/tasks/main.yml b/ansible/roles/postgres-managed-service/tasks/main.yml index 1b43a4c46d..1a8c69657f 100644 --- a/ansible/roles/postgres-managed-service/tasks/main.yml +++ b/ansible/roles/postgres-managed-service/tasks/main.yml @@ -33,22 +33,34 @@ name: azure-cloud-storage tasks_from: blob-upload.yml vars: - blob_container_name: "{{ postgresql_backup_storage }}" + blob_container_name: "{{ cloud_storage_dppostgresqlbackup_foldername }}" container_public_access: "off" blob_file_name: "{{ postgresql_backup_gzip_file_name }}.zip" local_file_or_folder_path: "{{ postgresql_backup_dir }}/{{ postgresql_backup_gzip_file_name }}.zip" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: upload file to aws s3 + include_role: + name: aws-cloud-storage + tasks_from: upload.yml + vars: + s3_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" + s3_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}.zip" + when: cloud_service_provider == "aws" + - name: upload file to gcloud storage include_role: name: gcp-cloud-storage tasks_from: upload.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ postgresql_backup_storage }}" - dest_file_name: "{{ postgresql_backup_gzip_file_name }}.zip" + gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}.zip" local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/postgresql-backup/defaults/main.yml b/ansible/roles/postgresql-backup/defaults/main.yml index c3deb6684d..a27674128c 100755 --- a/ansible/roles/postgresql-backup/defaults/main.yml +++ b/ansible/roles/postgresql-backup/defaults/main.yml @@ -1,14 +1,5 @@ postgresql_backup_dir: /tmp/postgresql-backup postgresql_user: postgres -postgresql_backup_azure_container_name: postgresql-backup -#postgresql_backup_azure_container_name: "{{ azure_container }}" -postgresql_backup_azure_storage_account_name: "{{sunbird_management_storage_account_name}}" -postgresql_backup_azure_storage_access_key: "{{sunbird_management_storage_account_key}}" - -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -postgresql_backup_storage: "{{ postgresql_backup_azure_container_name }}" +cloud_storage_dppostgresqlbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dppostgresqlbackup_foldername: dp-postgresql-backup diff --git a/ansible/roles/postgresql-backup/tasks/main.yml b/ansible/roles/postgresql-backup/tasks/main.yml index 78d922cefc..ddff9d62bd 100755 --- a/ansible/roles/postgresql-backup/tasks/main.yml +++ b/ansible/roles/postgresql-backup/tasks/main.yml @@ -18,22 +18,34 @@ name: azure-cloud-storage tasks_from: blob-upload.yml vars: - blob_container_name: "{{ postgresql_backup_storage }}" + blob_container_name: "{{ cloud_storage_dppostgresqlbackup_foldername }}" container_public_access: "off" blob_file_name: "{{ postgresql_backup_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: upload file to aws s3 + include_role: + name: aws-cloud-storage + tasks_from: upload.yml + vars: + s3_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" + s3_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}" + when: cloud_service_provider == "aws" + - name: upload file to gcloud storage include_role: name: gcp-cloud-storage tasks_from: upload.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ postgresql_backup_storage }}" - dest_file_name: "{{ postgresql_backup_gzip_file_name }}" + gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_backup_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_backup_gzip_file_path }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/postgresql-restore/defaults/main.yml b/ansible/roles/postgresql-restore/defaults/main.yml index b839e3b5a9..571a4f59fd 100755 --- a/ansible/roles/postgresql-restore/defaults/main.yml +++ b/ansible/roles/postgresql-restore/defaults/main.yml @@ -3,11 +3,6 @@ postgresql_port: 5432 postgresql_cluster_version: 9.5 postgresql_cluster_name: main postgresql_user: postgres -postgresql_restore_azure_container_name: postgresql-backup -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -postgresql_restore_storage: "{{ postgresql_restore_azure_container_name }}" +cloud_storage_dppostgresqlbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dppostgresqlbackup_foldername: dp-postgresql-backup diff --git a/ansible/roles/postgresql-restore/tasks/main.yml b/ansible/roles/postgresql-restore/tasks/main.yml index 58b10685dc..275061e9f2 100755 --- a/ansible/roles/postgresql-restore/tasks/main.yml +++ b/ansible/roles/postgresql-restore/tasks/main.yml @@ -9,32 +9,33 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ postgresql_restore_storage }}" + blob_container_name: "{{ cloud_storage_dppostgresqlbackup_foldername }}" blob_file_name: "{{ postgresql_restore_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_restore_gzip_file_path }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" -- name: download file from gcloud storage +- name: download a file from aws s3 include_role: - name: gcp-cloud-storage + name: aws-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ postgresql_restore_storage }}" - dest_file_name: "{{ postgresql_restore_gzip_file_name }}" - local_file_or_folder_path: "{{ postgresql_restore_gzip_file_path }}" - when: cloud_service_provider == "gcloud" - + s3_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ postgresql_restore_gzip_file_path }}" + s3_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_restore_gzip_file_name }}" + when: cloud_service_provider == "aws" + - name: download file from gcloud storage include_role: name: gcp-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ postgresql_restore_storage }}" - dest_file_name: "{{ postgresql_restore_gzip_file_name }}" + gcp_bucket_name: "{{ cloud_storage_dppostgresqlbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dppostgresqlbackup_foldername }}/{{ postgresql_restore_gzip_file_name }}" local_file_or_folder_path: "{{ postgresql_restore_gzip_file_path }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/redis-backup/defaults/main.yml b/ansible/roles/redis-backup/defaults/main.yml index ba0f72de5a..a00387ac74 100644 --- a/ansible/roles/redis-backup/defaults/main.yml +++ b/ansible/roles/redis-backup/defaults/main.yml @@ -1,9 +1,4 @@ redis_backup_dir: /tmp/redis-backup -redis_backup_azure_container_name: dp-redis-backup -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -redis_backup_storage: "{{ redis_backup_azure_container_name }}" +cloud_storage_dpredisbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dpredisbackup_foldername: dp-redis-backup diff --git a/ansible/roles/redis-backup/tasks/main.yml b/ansible/roles/redis-backup/tasks/main.yml index 00a97b2324..061a3414ed 100644 --- a/ansible/roles/redis-backup/tasks/main.yml +++ b/ansible/roles/redis-backup/tasks/main.yml @@ -23,22 +23,34 @@ name: azure-cloud-storage tasks_from: blob-upload.yml vars: - blob_container_name: "{{ redis_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpredisbackup_foldername }}" container_public_access: "off" blob_file_name: "{{ redis_backup_file_name }}" local_file_or_folder_path: "{{ redis_backup_file_path }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: upload file to aws s3 + include_role: + name: aws-cloud-storage + tasks_from: upload.yml + vars: + s3_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "{{ redis_backup_file_path }}" + s3_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_backup_file_name }}" + when: cloud_service_provider == "aws" + - name: upload file to gcloud storage include_role: name: gcp-cloud-storage tasks_from: upload.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ redis_backup_storage }}" - dest_file_name: "{{ redis_backup_file_name }}" + gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_backup_file_name }}" local_file_or_folder_path: "{{ redis_backup_file_path }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/redis-multiprocess-backup/defaults/main.yml b/ansible/roles/redis-multiprocess-backup/defaults/main.yml index 5e78a866c8..849e3099bf 100644 --- a/ansible/roles/redis-multiprocess-backup/defaults/main.yml +++ b/ansible/roles/redis-multiprocess-backup/defaults/main.yml @@ -1,5 +1,4 @@ redis_backup_dir: /tmp/redis-backup -redis_backup_azure_container_name: dp-redis-backup redis_data_dir: /data redis: config: @@ -12,9 +11,5 @@ redis: dialcode: name: 'dialcode' -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -redis_backup_storage: "{{ redis_backup_azure_container_name }}" +cloud_storage_dpredisbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dpredisbackup_foldername: dp-redis-backup diff --git a/ansible/roles/redis-multiprocess-backup/tasks/main.yml b/ansible/roles/redis-multiprocess-backup/tasks/main.yml index 343d97b8b1..9e44b18f05 100644 --- a/ansible/roles/redis-multiprocess-backup/tasks/main.yml +++ b/ansible/roles/redis-multiprocess-backup/tasks/main.yml @@ -23,23 +23,35 @@ name: azure-cloud-storage tasks_from: blob-upload.yml vars: - blob_container_name: "{{ redis_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpredisbackup_foldername }}" container_public_access: "off" blob_file_name: "{{ item.value.name }}/{{ redis_backup_file_name }}" local_file_or_folder_path: "{{ redis_backup_dir }}/{{ item.value.name }}/{{ redis_backup_file_name }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" with_dict: "{{ redis.config }}" when: cloud_service_provider == "azure" + +- name: upload backup to S3 + include_role: + name: aws-cloud-storage + tasks_from: upload-folder.yml + vars: + local_file_or_folder_path: "{{ redis_backup_dir }}/*" + s3_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + s3_path: "{{ cloud_storage_dpredisbackup_foldername }}" + aws_default_region: "{{ cloud_public_storage_region }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + when: cloud_service_provider == "aws" - name: upload files to gcloud storage include_role: name: gcp-cloud-storage tasks_from: upload-batch.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ redis_backup_storage }}" - dest_folder_path: "" + gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}" local_file_or_folder_path: "{{ redis_backup_dir }}/*" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/redis-multiprocess-restore/defaults/main.yml b/ansible/roles/redis-multiprocess-restore/defaults/main.yml index a4c0fdce37..84f07feb16 100644 --- a/ansible/roles/redis-multiprocess-restore/defaults/main.yml +++ b/ansible/roles/redis-multiprocess-restore/defaults/main.yml @@ -1,6 +1,5 @@ redis_user: analytics redis_restore_user: analytics -redis_backup_azure_container_name: dp-redis-backup analytics_user_home: /home/analytics device_port: "{{ device_port }}" @@ -8,9 +7,5 @@ user_port: "{{ user_port }}" content_port: "{{ content_port }}" dialcode_port: "{{ dialcode_port }}" -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -redis_backup_storage: "{{ redis_backup_azure_container_name }}" +cloud_storage_dpredisbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dpredisbackup_foldername: dp-redis-backup diff --git a/ansible/roles/redis-multiprocess-restore/tasks/main.yml b/ansible/roles/redis-multiprocess-restore/tasks/main.yml index 73ace7760c..7483f4ac15 100644 --- a/ansible/roles/redis-multiprocess-restore/tasks/main.yml +++ b/ansible/roles/redis-multiprocess-restore/tasks/main.yml @@ -11,22 +11,35 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ redis_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpredisbackup_foldername }}" blob_file_name: "{{ item }}/{{ redis_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ item }}/{{ redis_restore_file_name }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" with_items: "{{ redis_restore_process }}" when: cloud_service_provider == "azure" +- name: download a file from aws s3 + become: true + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + s3_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "/tmp/{{ item }}/{{ redis_restore_file_name }}" + s3_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ item }}/{{ redis_restore_file_name }}" + when: cloud_service_provider == "aws" + - name: download file from gcloud storage include_role: name: gcp-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ redis_backup_storage }}" - dest_file_name: "{{ item }}/{{ redis_restore_file_name }}" + gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ item }}/{{ redis_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ item }}/{{ redis_restore_file_name }}" with_items: "{{ redis_restore_process }}" when: cloud_service_provider == "gcloud" diff --git a/ansible/roles/redis-restore/defaults/main.yml b/ansible/roles/redis-restore/defaults/main.yml index 02120d147f..dfa157a310 100644 --- a/ansible/roles/redis-restore/defaults/main.yml +++ b/ansible/roles/redis-restore/defaults/main.yml @@ -1,9 +1,4 @@ -redis_backup_azure_container_name: dp-redis-backup analytics_user_home: /home/analytics -# This variable is added for the below reason - -# 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name -# 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo -# or other default files and just assign the value to the newly introduced common variable -# 3. After few releases, we will remove the older variables and use only the new variables across the repos -redis_backup_storage: "{{ redis_backup_azure_container_name }}" +cloud_storage_dpredisbackup_bucketname: "{{ cloud_storage_management_bucketname }}" +cloud_storage_dpredisbackup_foldername: dp-redis-backup diff --git a/ansible/roles/redis-restore/tasks/main.yml b/ansible/roles/redis-restore/tasks/main.yml index 98c3ba4b70..bdab9bd116 100644 --- a/ansible/roles/redis-restore/tasks/main.yml +++ b/ansible/roles/redis-restore/tasks/main.yml @@ -5,20 +5,33 @@ name: azure-cloud-storage tasks_from: blob-download.yml vars: - blob_container_name: "{{ redis_backup_storage }}" + blob_container_name: "{{ cloud_storage_dpredisbackup_foldername }}" blob_file_name: "{{ redis_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ redis_restore_file_name }}" - storage_account_name: "{{ azure_management_storage_account_name }}" - storage_account_key: "{{ azure_management_storage_account_key }}" + storage_account_name: "{{ cloud_management_storage_accountname }}" + storage_account_key: "{{ cloud_management_storage_secret }}" when: cloud_service_provider == "azure" +- name: download file from aws s3 + include_role: + name: aws-cloud-storage + tasks_from: download.yml + vars: + s3_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + aws_access_key_id: "{{ cloud_management_storage_accountname }}" + aws_secret_access_key: "{{ cloud_management_storage_secret }}" + aws_default_region: "{{ cloud_public_storage_region }}" + local_file_or_folder_path: "/tmp/{{ redis_restore_file_name }}" + s3_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_restore_file_name }}" + when: cloud_service_provider == "aws" + - name: download file from gcloud storage include_role: name: gcp-cloud-storage tasks_from: download.yml vars: - gcp_bucket_name: "{{ gcloud_management_bucket_name }}" - dest_folder_name: "{{ redis_backup_storage }}" + gcp_bucket_name: "{{ cloud_storage_dpredisbackup_bucketname }}" + gcp_path: "{{ cloud_storage_dpredisbackup_foldername }}/{{ redis_restore_file_name }}" dest_file_name: "{{ redis_restore_file_name }}" local_file_or_folder_path: "/tmp/{{ redis_restore_file_name }}" when: cloud_service_provider == "gcloud" From aa4982ec7f23be9c5a871980bc8a5d9ef53665da Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 23 Dec 2022 15:48:03 +0530 Subject: [PATCH 11/31] csp migration variables update --- .../templates/job-cluster-taskmanager.yaml | 16 ++++++++-------- .../datapipeline/flink-jobs/values.j2 | 2 +- .../templates/spark-env.j2 | 16 ++++++++-------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml index ecf59608db..0c858aaca2 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-taskmanager.yaml @@ -30,18 +30,18 @@ spec: "-Dtaskmanager.rpc.port=6122", "-Dtaskmanager.heap.size={{ index .Values $job-config-key.task_manager_heap_size }}", {{- if eq .Values.checkpoint_store_type "azure" }} - "-Dfs.azure.account.key.{{ .Values.azure_storage_account }}.blob.core.windows.net: {{ .Values.azure_storage_secret }}", + "-Dfs.azure.account.key.{{ .Values.cloud_storage_key }}.blob.core.windows.net: {{ .Values.cloud_storage_secret }}", {{- end }} {{- if eq .Values.checkpoint_store_type "aws" }} - "-Ds3.access-key={{ .Values.s3_access_key }}", - "-Ds3.secret-key={{ .Values.s3_secret_key }}", - "-Ds3.endpoint={{ .Values.s3_endpoint }}", - "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", + "-Ds3.access-key={{ .Values.cloud_storage_key }}", + "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", + "-Ds3.endpoint={{ .Values.cloud_storage_endpoint }}", + "-Ds3.path.style.access={{ .Values.cloud_storage_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} - "-Dfs.gs.auth.client.id={{ .Values.gcloud_client_key }}", - "-Dfs.gs.auth.client.secret={{ .Values.gcloud_private_secret }}", - "-Dfs.gs.project.id={{ .Values.gcloud_project_id }}", + "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", + "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dconfig.file=/opt/flink/conf/{{ .Release.Name }}.conf"] ports: diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index f03a3f2459..8c6861aec5 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -10,7 +10,7 @@ checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ cloud_storage_path_style_access }} +cloud_storage_path_style_access: {{ cloud_storage_pathstyle_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} telemetry-extractor: diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index 53bf3c3888..48747a71d9 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -70,12 +70,12 @@ export SPARK_WORKER_MEMORY={{ spark.worker.memory }} export SPARK_WORKER_INSTANCES={{ spark.worker.instances }} export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" -export reports_storage_key={{sunbird_private_storage_account_name}} -export reports_storage_secret={{sunbird_private_storage_account_key}} -export cloud_private_storage_accountname={{sunbird_private_storage_account_name}} -export cloud_private_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} -export aws_storage_key={{ s3_storage_key }} -export aws_storage_secret={{ s3_storage_secret }} +export reports_storage_key={{cloud_public_storage_accountname}} +export reports_storage_secret={{cloud_public_storage_secret}} +export azure_storage_key={{cloud_private_storage_accountname}} +export azure_storage_secret={{cloud_private_storage_secret}} +export druid_storage_account_key={{cloud_public_storage_accountname}} +export druid_storage_account_secret={{cloud_public_storage_secret}} +export aws_storage_key={{ cloud_public_storage_accountname }} +export aws_storage_secret={{ cloud_public_storage_secret }} From 0b4ac59a70c23f89050d7b2a1510e4fcbcd91551 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 23 Dec 2022 16:03:44 +0530 Subject: [PATCH 12/31] csp migration variables update --- ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml | 4 ++-- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml index 07c02ac1c6..08c834d1bd 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/lpa-telemetry-backup-deploy/defaults/main.yml @@ -2,8 +2,8 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -cloud_storage_telemetry_bucketname: "{{secor_azure_container_name}}" -cloud_private_storage_secret: "{{sunbird_private_storage_account_key}}" +azure_container_name: "{{cloud_storage_telemetry_bucketname}}" +azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 7e52ae02e4..5e24a16472 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -7,7 +7,7 @@ checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ s3_path_style_access }} +s3_path_style_access: {{ cloud_storage_path_style_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} serviceMonitor: From e930804c015f18e3595093a441759bf204628246 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 26 Dec 2022 13:26:58 +0530 Subject: [PATCH 13/31] csp migration variables update --- ansible/roles/analytics-druid/defaults/main.yml | 4 ++-- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index a22a3b9c0e..0c7b03bf33 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -139,7 +139,7 @@ default_druid_configs: s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_endpoint: "{{ cloud_public_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" @@ -213,7 +213,7 @@ default_druid_configs: s3_bucket: "{{ s3_storage_container }}" s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" - s3_path_like_access: "{{ cloud_storage_path_style_access }}" + s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" s3_v4_sign_region: "{{ cloud_public_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 5e24a16472..8f4aef3f33 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -7,7 +7,7 @@ checkpoint_store_type: {{ cloud_service_provider }} cloud_storage_key: {{ cloud_public_storage_accountname }} cloud_storage_secret: {{ cloud_public_storage_secret }} cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} -s3_path_style_access: {{ cloud_storage_path_style_access }} +s3_path_style_access: {{ cloud_storage_pathstyle_access }} cloud_storage_project_id: {{ cloud_public_storage_project }} serviceMonitor: From 41e58c17c2ca8be8527e3b6e765afa5007d6b9df Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 26 Dec 2022 15:31:19 +0530 Subject: [PATCH 14/31] csp migration variables update --- .../templates/job-cluster-jobmanager.yaml | 2 +- .../datapipeline/flink-jobs/values.j2 | 8 ++++---- .../roles/analytics-druid/defaults/main.yml | 18 +++++++++--------- .../templates/spark-env.j2 | 12 ++++++------ .../templates/common.conf.j2 | 2 +- .../helm_charts/datapipeline_jobs/values.j2 | 8 ++++---- 6 files changed, 25 insertions(+), 25 deletions(-) diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml index 6b25ab6c53..a8773232bd 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/templates/job-cluster-jobmanager.yaml @@ -36,7 +36,7 @@ spec: {{- if eq .Values.checkpoint_store_type "aws" }} "-Ds3.access-key={{ .Values.cloud_storage_key }}", "-Ds3.secret-key={{ .Values.cloud_storage_secret }}", - "-Ds3.endpoint={{ .Values.cloud_public_endpoint }}", + "-Ds3.endpoint={{ .Values.cloud_private_endpoint }}", "-Ds3.path.style.access={{ .Values.s3_path_style_access }}", {{- end }} {{- if eq .Values.checkpoint_store_type "gcloud" }} diff --git a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 index 8c6861aec5..d7c53a44c7 100644 --- a/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 +++ b/ansible/kubernetes/helm_charts/datapipeline/flink-jobs/values.j2 @@ -7,11 +7,11 @@ repository: {{ datapipeline_repository|default('data-pipeline') }} image_tag: {{ image_tag }} checkpoint_store_type: {{ cloud_service_provider }} -cloud_storage_key: {{ cloud_public_storage_accountname }} -cloud_storage_secret: {{ cloud_public_storage_secret }} -cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} +cloud_storage_key: {{ cloud_private_storage_accountname }} +cloud_storage_secret: {{ cloud_private_storage_secret }} +cloud_storage_endpoint: {{ cloud_private_storage_endpoint }} cloud_storage_path_style_access: {{ cloud_storage_pathstyle_access }} -cloud_storage_project_id: {{ cloud_public_storage_project }} +cloud_storage_project_id: {{ cloud_private_storage_project }} telemetry-extractor: job_name=telemetry-extractor diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 0c7b03bf33..2a473660e7 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -126,21 +126,21 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_public_storage_accountname }}" - azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" druid_log_azure_folder: "druidlogs" #Druid S3 Details druid_storage_type: "{{ cloud_storage_telemetry_type }}" - s3_access_key: "{{ cloud_public_storage_accountname }}" - s3_secret_key: "{{ cloud_public_storage_secret }}" + s3_access_key: "{{ cloud_private_storage_accountname }}" + s3_secret_key: "{{ cloud_private_storage_secret }}" s3_bucket: "{{ cloud_storage_telemetry_bucketname }}" - s3_endpoint: "{{ cloud_public_storage_endpoint }}" + s3_endpoint: "{{ cloud_private_storage_endpoint }}" s3_segment_dir: "druid/raw/segments" s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" @@ -200,8 +200,8 @@ default_druid_configs: druid_postgres_user: "{{ druid_postgres_user | default('druid@' + postgres.db_url) }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ cloud_public_storage_accountname }}" - azure_storage_secret: "{{ cloud_public_storage_secret }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure druid_log_azure_container: "{{ cloud_storage_telemetry_bucketname }}" @@ -214,7 +214,7 @@ default_druid_configs: s3_endpoint: "{{ s3_storage_endpoint }}" s3_segment_dir: "druid/rollup/segments" s3_path_like_access: "{{ cloud_storage_pathstyle_access }}" - s3_v4_sign_region: "{{ cloud_public_storage_region }}" + s3_v4_sign_region: "{{ cloud_private_storage_region }}" #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" diff --git a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 index 48747a71d9..bb8a92abf0 100644 --- a/ansible/roles/analytics-spark-provision/templates/spark-env.j2 +++ b/ansible/roles/analytics-spark-provision/templates/spark-env.j2 @@ -70,12 +70,12 @@ export SPARK_WORKER_MEMORY={{ spark.worker.memory }} export SPARK_WORKER_INSTANCES={{ spark.worker.instances }} export SPARK_EXECUTOR_MEMORY={{ spark.executor.memory }} export SPARK_PUBLIC_DNS="{{ spark.public_dns }}" -export reports_storage_key={{cloud_public_storage_accountname}} -export reports_storage_secret={{cloud_public_storage_secret}} +export reports_storage_key={{cloud_private_storage_accountname}} +export reports_storage_secret={{cloud_private_storage_secret}} export azure_storage_key={{cloud_private_storage_accountname}} export azure_storage_secret={{cloud_private_storage_secret}} -export druid_storage_account_key={{cloud_public_storage_accountname}} -export druid_storage_account_secret={{cloud_public_storage_secret}} -export aws_storage_key={{ cloud_public_storage_accountname }} -export aws_storage_secret={{ cloud_public_storage_secret }} +export druid_storage_account_key={{cloud_private_storage_accountname}} +export druid_storage_account_secret={{cloud_private_storage_secret}} +export aws_storage_key={{ cloud_private_storage_accountname }} +export aws_storage_secret={{ cloud_private_storage_secret }} diff --git a/ansible/roles/data-products-deploy/templates/common.conf.j2 b/ansible/roles/data-products-deploy/templates/common.conf.j2 index 24159515e3..30f4a94599 100644 --- a/ansible/roles/data-products-deploy/templates/common.conf.j2 +++ b/ansible/roles/data-products-deploy/templates/common.conf.j2 @@ -18,7 +18,7 @@ reports.storage.secret.config="{{ dp_reports_storage_secret_config }}" cloud_storage_telemetry_type="azure" {% elif (dp_object_store_type == "cephs3" or dp_object_store_type == "s3") %} cloud_storage_telemetry_type="s3" -cloud_public_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" +cloud_private_storage_endpoint="{{ s3_storage_endpoint | regex_replace('^[a-z]+://(.*)$', '\\1') }}" cloud_storage_endpoint_with_protocol="{{ s3_storage_endpoint }}" aws_storage_key="{{ s3_storage_key }}" aws_storage_secret="{{ s3_storage_secret }}" diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 8f4aef3f33..29c1373a8e 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -4,11 +4,11 @@ dockerhub: {{ dockerhub }} repository: {{flink_repository|default('sunbird-datapipeline')}} image_tag: {{ image_tag }} checkpoint_store_type: {{ cloud_service_provider }} -cloud_storage_key: {{ cloud_public_storage_accountname }} -cloud_storage_secret: {{ cloud_public_storage_secret }} -cloud_storage_endpoint: {{ cloud_public_storage_endpoint }} +cloud_storage_key: {{ cloud_private_storage_accountname }} +cloud_storage_secret: {{ cloud_private_storage_secret }} +cloud_storage_endpoint: {{ cloud_private_storage_endpoint }} s3_path_style_access: {{ cloud_storage_pathstyle_access }} -cloud_storage_project_id: {{ cloud_public_storage_project }} +cloud_storage_project_id: {{ cloud_private_storage_project }} serviceMonitor: enabled: {{ service_monitor_enabled | lower}} From aceeb5d6e459746da834f6d7f494428ab00add4c Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 26 Dec 2022 17:00:49 +0530 Subject: [PATCH 15/31] csp migration variables update --- ansible/inventory/env/group_vars/all.yml | 8 +++---- ansible/lpa_data-products_deploy.yml | 4 ++-- .../analytics-bootstrap-spark/tasks/main.yml | 21 ++++++++++--------- .../templates/create-cluster.sh.j2 | 4 ++-- .../defaults/main.yml | 4 ++-- .../templates/conf/DialcodeRedisIndexer.j2 | 4 ++-- .../templates/cluster-config.json.j2 | 4 ++-- .../templates/start-jobmanager.j2 | 12 +++++------ .../templates/secor.azure.j2 | 6 +++--- .../tasks/main.yml | 4 ++-- .../defaults/main.yml | 2 +- .../templates/secor.azure.j2 | 4 ++-- ansible/spark-cluster-job-submit.yml | 4 ++-- .../roles/flink-jobs-deploy/defaults/main.yml | 4 ++-- .../helm_charts/druid-cluster/values.j2 | 4 ++-- kubernetes/helm_charts/secor/values.j2 | 4 ++-- 16 files changed, 47 insertions(+), 46 deletions(-) diff --git a/ansible/inventory/env/group_vars/all.yml b/ansible/inventory/env/group_vars/all.yml index 13114d01d9..f945a09f41 100644 --- a/ansible/inventory/env/group_vars/all.yml +++ b/ansible/inventory/env/group_vars/all.yml @@ -10,8 +10,8 @@ channel_data_exhaust_bucket: dev-data-store secrets_path: '{{inventory_dir}}/secrets.yml' artifacts_container: "{{dp_vault_artifacts_container}}" -report_azure_account_name: "{{sunbird_private_storage_account_name}}" -report_azure_storage_secret: "{{sunbird_private_storage_account_key}}" +report_azure_account_name: "{{cloud_private_storage_accountname}}" +report_azure_storage_secret: "{{cloud_private_storage_secret}}" redis_host: "{{ groups['redis'][0] }}" metadata_redis_host: "{{ groups['redis'][0] }}" @@ -50,8 +50,8 @@ secor: artifact_dir: /mount/secor artifact_ver: "0.29" azure: - account_name: "{{sunbird_private_storage_account_name}}" - account_key: "{{sunbird_private_storage_account_key}}" + account_name: "{{cloud_private_storage_accountname}}" + account_key: "{{cloud_private_storage_secret}}" container_name: "{{channel_data_exhaust_bucket}}" paths: ['/mount/secor', '/mount/secor/reports', '/mount/secor/logs', '/home/analytics/sbin', '/mount/data/analytics'] channel: "{{secor_alerts_slack_channel}}" diff --git a/ansible/lpa_data-products_deploy.yml b/ansible/lpa_data-products_deploy.yml index 7e84ba53f7..99eab0c448 100644 --- a/ansible/lpa_data-products_deploy.yml +++ b/ansible/lpa_data-products_deploy.yml @@ -5,7 +5,7 @@ become: yes become_user: "{{ analytics_user }}" environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" - AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + AZURE_STORAGE_ACCOUNT: "{{cloud_private_storage_accountname}}" + AZURE_STORAGE_KEY: "{{cloud_private_storage_secret}}" roles: - data-products-deploy diff --git a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml index 13ba75f78a..c6c9bfe7ff 100644 --- a/ansible/roles/analytics-bootstrap-spark/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-spark/tasks/main.yml @@ -8,8 +8,8 @@ line: 'export {{item.var}}={{item.value}}' regexp: "export {{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} + - {var: 'azure_storage_key', value: '{{ cloud_private_storage_accountname }}'} + - {var: 'azure_storage_secret', value: '{{ cloud_private_storage_secret }}'} - name: Adding ENV Vars to spark servers environment. become: yes @@ -18,20 +18,21 @@ line: '{{item.var}}={{item.value}}' regexp: "{{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'AZURE_STORAGE_ACCOUNT', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'AZURE_STORAGE_ACCESS_KEY', value: '{{ sunbird_private_storage_account_key }}'} - - {var: 'PUBLIC_AZURE_STORAGE_ACCOUNT', value: '{{ sunbird_public_storage_account_name }}'} - - {var: 'PUBLIC_AZURE_STORAGE_ACCESS_KEY', value: '{{ sunbird_public_storage_account_key }}'} - - {var: 'PRIVATE_REPORT_CONTAINER', value: '{{ sunbird_private_azure_report_container_name }}'} - - {var: 'PUBLIC_REPORT_CONTAINER', value: '{{ sunbird_public_azure_report_container_name }}'} + - {var: 'azure_storage_key', value: '{{ cloud_private_storage_accountname }}'} + - {var: 'azure_storage_secret', value: '{{ cloud_private_storage_secret }}'} + - {var: 'AZURE_STORAGE_ACCOUNT', value: '{{ cloud_private_storage_accountname }}'} + - {var: 'AZURE_STORAGE_ACCESS_KEY', value: '{{ cloud_private_storage_secret }}'} + - {var: 'PUBLIC_AZURE_STORAGE_ACCOUNT', value: '{{ cloud_public_storage_accountname }}'} + - {var: 'PUBLIC_AZURE_STORAGE_ACCESS_KEY', value: '{{ cloud_public_storage_secret }}'} + - {var: 'PRIVATE_REPORT_CONTAINER', value: '{{ cloud_storage_privatereports_bucketname }}'} + - {var: 'PUBLIC_REPORT_CONTAINER', value: '{{ cloud_storage_publicreports_bucketname }}'} - {var: 'REPORT_BACKUP_CONTAINER', value: 'portal-reports-backup'} - {var: 'GOOGLE_CREDENTIALS_PATH', value: '/home/analytics/credentials'} - {var: 'STORAGE_PROVIDER', value: 'AZURE'} - {var: 'ENV', value: '{{env}}'} - {var: 'KAFKA_BROKER_HOST', value: "{{groups['processing-cluster-kafka'][0]}}:9092"} + - name: Install required python packages become: yes action: apt pkg={{ item }} state=present update_cache=yes diff --git a/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 b/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 index 8a7703c936..76b4c45585 100644 --- a/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 +++ b/ansible/roles/azure-hdinsight-spark-cluster/templates/create-cluster.sh.j2 @@ -8,8 +8,8 @@ headnode_size="{{headnode_size}}" location="{{location}}" http_user=admin http_password="{{azure_spark_cluster_http_password}}" -storage_account_name="{{sunbird_private_storage_account_name}}" -storage_account_key="{{sunbird_private_storage_account_key}}" +storage_account_name="{{cloud_private_storage_accountname}}" +storage_account_key="{{cloud_private_storage_secret}}" storage_container="{{spark_storage_container}}" subnet_name="{{subnet_name}}" vnet_name="{{vnet_name}}" diff --git a/ansible/roles/content-snapshot-indexer/defaults/main.yml b/ansible/roles/content-snapshot-indexer/defaults/main.yml index 40ba1ef8dd..db47b85580 100644 --- a/ansible/roles/content-snapshot-indexer/defaults/main.yml +++ b/ansible/roles/content-snapshot-indexer/defaults/main.yml @@ -46,8 +46,8 @@ cloud_storage: container: "telemetry-data-store" # Container is different in all env so override this. object_key: "druid-content-snapshot/snapshot.txt" provider: "azure" - account_name: "{{sunbird_public_storage_account_name}}" - account_key: "{{sunbird_public_storage_account_key}}" + account_name: "{{cloud_public_storage_accountname}}" + account_key: "{{cloud_public_storage_secret}}" cassandra: host: "{{lp_cassandra_host}}" ## LMS-Cassandra IP Address. diff --git a/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 b/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 index 337759f5d1..0aafd7cb74 100644 --- a/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 +++ b/ansible/roles/content-snapshot-indexer/templates/conf/DialcodeRedisIndexer.j2 @@ -10,7 +10,7 @@ redis.max.pipeline.size="{{ redis.max_pipeline_size }}" redis.dialcode.database.index=6 -cloudStorage.accountName="{{sunbird_private_storage_account_name}}" -cloudStorage.accountKey="{{sunbird_private_storage_account_key}}" +cloudStorage.accountName="{{cloud_private_storage_accountname}}" +cloudStorage.accountKey="{{cloud_private_storage_secret}}" cloudStorage.container="{{ bucket | default('telemetry-data-store') }}" cloudStorage.dialCodeDataFile="dialcode-data/dial_code.csv" diff --git a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 index f0d4e57ae6..26f4e016d6 100644 --- a/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 +++ b/ansible/roles/data-products-deploy/templates/cluster-config.json.j2 @@ -44,7 +44,7 @@ "spark.cassandra.connection.timeoutMS" : "{{ spark_cassandra_connection_timeout_millis }}", "spark.cassandra.read.timeoutMS" : "{{ spark_cassandra_query_timeout_millis }}", "spark.cassandra.input.fetch.sizeInRows": "{{ spark_cassandra_query_max_rows_fetch_count }}", - "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}", - "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ sunbird_private_storage_account_key }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{sunbird_private_storage_account_key}} -Ddruid_storage_account_key={{ sunbird_public_storage_account_name }} -Ddruid_storage_account_secret={{sunbird_public_storage_account_key}}" + "spark.driver.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ cloud_private_storage_secret }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{cloud_private_storage_secret}} -Ddruid_storage_account_key={{ cloud_public_storage_accountname }} -Ddruid_storage_account_secret={{cloud_public_storage_secret}}", + "spark.executor.extraJavaOptions": "-Detwlogger.component=sparkdriver -DlogFilter.filename=SparkLogFilters.xml -DpatternGroup.filename=SparkPatternGroups.xml -Dlog4jspark.root.logger=INFO,console,RFA,ETW,Anonymizer,org.ekstep.analytics -Dlog4jspark.log.dir=/var/log/sparkapp/${user.name} -Dlog4jspark.log.file=sparkdriver.log -Dlog4j.configuration=file:/usr/hdp/current/spark2-client/conf/log4j.properties -Djavax.xml.parsers.SAXParserFactory=com.sun.org.apache.xerces.internal.jaxp.SAXParserFactoryImpl -XX:+UseParallelGC -XX:+UseParallelOldGC -Dazure_storage_key={{ cloud_private_storage_accountname }} -Dazure_storage_secret={{ cloud_private_storage_secret }} -Dreports_storage_key={{cloud_private_storage_accountname}} -Dreports_storage_secret={{cloud_private_storage_secret}} -Ddruid_storage_account_key={{ cloud_public_storage_accountname }} -Ddruid_storage_account_secret={{cloud_public_storage_secret}}" } } diff --git a/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 b/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 index 2e613b9866..a3a156cfda 100644 --- a/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 +++ b/ansible/roles/data-products-deploy/templates/start-jobmanager.j2 @@ -5,12 +5,12 @@ export DP_LOGS={{ analytics.home }}/logs/data-products export SERVICE_LOGS={{ analytics.home }}/logs/services export JM_HOME={{ analytics.home }}/job-manager -export azure_storage_key={{sunbird_private_storage_account_name}} -export azure_storage_secret={{sunbird_private_storage_account_key}} -export reports_azure_storage_key={{sunbird_private_storage_account_name}} -export reports_azure_storage_secret={{sunbird_private_storage_account_key}} -export druid_storage_account_key={{sunbird_public_storage_account_name}} -export druid_storage_account_secret={{sunbird_public_storage_account_key}} +export azure_storage_key={{cloud_private_storage_accountname}} +export azure_storage_secret={{cloud_private_storage_secret}} +export reports_azure_storage_key={{cloud_private_storage_accountname}} +export reports_azure_storage_secret={{cloud_private_storage_secret}} +export druid_storage_account_key={{cloud_public_storage_accountname}} +export druid_storage_account_secret={{cloud_public_storage_secret}} export heap_conf_str={{ spark.heap_conf_str }} today=$(date "+%Y-%m-%d") diff --git a/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 b/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 index f44174563c..b2acc84e37 100644 --- a/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 +++ b/ansible/roles/lpa-telemetry-backup-deploy/templates/secor.azure.j2 @@ -12,12 +12,12 @@ secor.azure.endpoints.protocol=https # Microsoft Azure authentication credentials. # https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account -secor.azure.account.name={{sunbird_private_storage_account_name}} -secor.azure.account.key={{sunbird_private_storage_account_key}} +secor.azure.account.name={{cloud_private_storage_accountname}} +secor.azure.account.key={{cloud_private_storage_secret}} # Microsoft Azure blob storage container name. Container is a grouping of a set # of blobs. https://msdn.microsoft.com/en-us/library/dd135715.aspx -secor.azure.container.name={{ azure_container_name }} +secor.azure.container.name={{ cloud_storage_telemetry_bucketname }} # Microsoft Azure blob storage path where files are stored within the container. secor.azure.path={{ secor_service_name[item[0]].base_path }} diff --git a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml index 5203a56b00..8890f132be 100644 --- a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml @@ -4,8 +4,8 @@ line: '{{item.var}}={{item.value}}' regexp: "{{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ sunbird_private_storage_account_name }}'} - - {var: 'azure_storage_secret', value: '{{ sunbird_private_storage_account_key }}'} + - {var: 'azure_storage_key', value: '{{ cloud_private_storage_accountname }}'} + - {var: 'azure_storage_secret', value: '{{ cloud_private_storage_secret }}'} no_log: true - name: Remove guava-jre, guice default jars diff --git a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml index a9edd50a3c..2eb76e196c 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml @@ -3,7 +3,7 @@ analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" azure_container_name: "{{secor_azure_container_name}}" -azure_account_key: "{{sunbird_private_storage_account_key}}" +azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" diff --git a/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 b/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 index f44174563c..e55aa469c0 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 +++ b/ansible/roles/secor-telemetry-backup-deploy/templates/secor.azure.j2 @@ -12,8 +12,8 @@ secor.azure.endpoints.protocol=https # Microsoft Azure authentication credentials. # https://azure.microsoft.com/en-us/documentation/articles/storage-create-storage-account -secor.azure.account.name={{sunbird_private_storage_account_name}} -secor.azure.account.key={{sunbird_private_storage_account_key}} +secor.azure.account.name={{cloud_private_storage_accountname}} +secor.azure.account.key={{cloud_private_storage_secret}} # Microsoft Azure blob storage container name. Container is a grouping of a set # of blobs. https://msdn.microsoft.com/en-us/library/dd135715.aspx diff --git a/ansible/spark-cluster-job-submit.yml b/ansible/spark-cluster-job-submit.yml index ba4e017a23..f66451ae32 100644 --- a/ansible/spark-cluster-job-submit.yml +++ b/ansible/spark-cluster-job-submit.yml @@ -4,8 +4,8 @@ vars_files: - "{{inventory_dir}}/secrets.yml" environment: - AZURE_STORAGE_ACCOUNT: "{{sunbird_private_storage_account_name}}" - AZURE_STORAGE_KEY: "{{sunbird_private_storage_account_key}}" + AZURE_STORAGE_ACCOUNT: "{{cloud_private_storage_accountname}}" + AZURE_STORAGE_KEY: "{{cloud_private_storage_secret}}" roles: - data-products-deploy diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index 633026b825..e371c56817 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -30,8 +30,8 @@ taskmanager_heap_memory: 1024 ### base-config related vars postgres_max_connections: 2 -azure_account: "{{ sunbird_private_storage_account_name }}" -azure_secret: "{{ sunbird_private_storage_account_key }}" +azure_account: "{{ cloud_private_storage_accountname }}" +azure_secret: "{{ cloud_private_storage_secret }}" flink_container_name: dev-data-store flink_dp_storage_container: "" checkpoint_store_type: azure diff --git a/kubernetes/helm_charts/druid-cluster/values.j2 b/kubernetes/helm_charts/druid-cluster/values.j2 index 169ef07b8e..bacc6bc321 100644 --- a/kubernetes/helm_charts/druid-cluster/values.j2 +++ b/kubernetes/helm_charts/druid-cluster/values.j2 @@ -24,8 +24,8 @@ druid_metadata_storage_connector_password: {{ druid_configs[cluster_type].druid_ # Druid Storage Type druid_deepstorage_type: azure -druid_azure_storage_account: "{{ sunbird_private_storage_account_name }}" -druid_azure_storage_account_key: "{{ sunbird_private_storage_account_key }}" +druid_azure_storage_account: "{{ cloud_private_storage_accountname }}" +druid_azure_storage_account_key: "{{ cloud_private_storage_secret }}" druid_azure_container: "{{ druid_configs[cluster_type].azure_container }}" # Indexing service logs diff --git a/kubernetes/helm_charts/secor/values.j2 b/kubernetes/helm_charts/secor/values.j2 index 4aa2e0ee83..2d1512cbed 100644 --- a/kubernetes/helm_charts/secor/values.j2 +++ b/kubernetes/helm_charts/secor/values.j2 @@ -1,5 +1,5 @@ -azure_account: "{{ sunbird_private_storage_account_name }}" -azure_secret: "{{ sunbird_private_storage_account_key }}" +azure_account: "{{ cloud_private_storage_accountname }}" +azure_secret: "{{ cloud_private_storage_secret }}" azure_container_name: "telemetry-data-store" namespace: {{ secor_namespace }} From fb462991181c7c1c9c06ad6761500c7cbd6c16a4 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 26 Dec 2022 17:08:03 +0530 Subject: [PATCH 16/31] csp migration variables update --- ansible/roles/analytics-druid/defaults/main.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index 2a473660e7..c16c4cc0d9 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -31,7 +31,7 @@ druid_request_logging_type: "file" #Druid Extensions -druid_storage_type: {{ cloud_storage_telemetry_type }} +druid_storage_type: "{{ cloud_storage_telemetry_type }}" druid_extensions_list : '"druid-azure-extensions", "graphite-emitter", "postgresql-metadata-storage", "druid-kafka-indexing-service", "druid-datasketches"' @@ -40,7 +40,7 @@ druid_community_extensions: # End of druid_extensions -druid_indexing_logs_type: {{ cloud_storage_telemetry_type }} +druid_indexing_logs_type: "{{ cloud_storage_telemetry_type }}" druid_indexing_log_dir: /var/druid/indexing-logs druid_indexing_storage_type : metadata druid_indexing_task_basedir : "/var/task" From adb278337c5c6b0d9adf5a1cb92a9d6a2c8ede64 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Fri, 30 Dec 2022 13:37:50 +0530 Subject: [PATCH 17/31] csp migration variables update --- ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml index 2eb76e196c..f31781473e 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml @@ -2,7 +2,7 @@ analytics_user: analytics analytics_group: analytics analytics_user_home: /home/{{analytics_user}} sbin_path: "{{ analytics_user_home }}/sbin" -azure_container_name: "{{secor_azure_container_name}}" +azure_container_name: "{{cloud_storage_telemetry_bucketname}}" azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" From 5faa25b0c05f202958bad928a41fcea0872b47b2 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Tue, 17 Jan 2023 15:56:57 +0530 Subject: [PATCH 18/31] csp migration variables update --- ansible/roles/analytics-druid/defaults/main.yml | 14 ++++++++++++++ .../templates/_common/common.runtime.properties | 7 +++++++ .../templates/flink_job_deployment.yaml | 4 ++-- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 4 +++- 4 files changed, 26 insertions(+), 3 deletions(-) diff --git a/ansible/roles/analytics-druid/defaults/main.yml b/ansible/roles/analytics-druid/defaults/main.yml index c16c4cc0d9..851294c080 100644 --- a/ansible/roles/analytics-druid/defaults/main.yml +++ b/ansible/roles/analytics-druid/defaults/main.yml @@ -144,6 +144,13 @@ default_druid_configs: #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/raw/stage/indexing_logs" + #Druid GCS Details + gcloud_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_prefix: "" + gcloud_max_list_length: 1024 + #Logging the indexing logs to GCS + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/raw/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m druid_coordinator_period: PT30S @@ -218,6 +225,13 @@ default_druid_configs: #Logging the indexing logs to s3 s3_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" s3_indexer_logs_dir: "druid/rollup/stage/indexing_logs" + #Druid GCS Details + druid.google.bucket: "{{ cloud_storage_telemetry_bucketname }}" + druid.google.prefix: "" + druid.google.maxListingLength: 1024 + #Logging the indexing logs to gcs + gcloud_logging_bucket: "{{ cloud_storage_telemetry_bucketname }}" + gcloud_indexer_logs_dir: "druid/rollup/stage/indexing_logs" #Druid coordinator node configuration druid_coordinator_heap_size: 128m druid_coordinator_period: PT30S diff --git a/ansible/roles/analytics-druid/templates/_common/common.runtime.properties b/ansible/roles/analytics-druid/templates/_common/common.runtime.properties index 8a047b09ff..be217e65c8 100644 --- a/ansible/roles/analytics-druid/templates/_common/common.runtime.properties +++ b/ansible/roles/analytics-druid/templates/_common/common.runtime.properties @@ -70,6 +70,10 @@ druid.s3.endpoint.signingRegion={{ druid_configs[cluster].s3_v4_sign_region }} # druid.storage.sse.type=s3 # uncomment to disable chunk encoding # druid.s3.disableChunkedEncoding=true +{% elif druid_storage_type == "google" %} +druid.google.bucket={{ druid_configs[cluster].gcloud_bucket }} +druid.google.prefix={{ druid_configs[cluster].gcloud_prefix }} +druid.google.maxListingLength={{ druid_configs[cluster].gcloud_max_list_length }} {% endif %} # Indexing service logs @@ -83,6 +87,9 @@ druid.indexer.logs.prefix= {{ druid_configs[cluster].druid_log_azure_folder }} druid.indexer.logs.s3Bucket={{ druid_configs[cluster].s3_logging_bucket }} # path to logs within the bucker druid.indexer.logs.s3Prefix={{ druid_configs[cluster].s3_indexer_logs_dir }} +{% elif druid_indexing_logs_type == "google" %} +druid.indexer.logs.bucket={{ druid_configs[cluster].gcloud_logging_bucket }} +druid.indexer.logs.prefix={{ druid_configs[cluster].gcloud_indexer_logs_dir }} {% endif %} # Service discovery diff --git a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml index fb3cb0dee8..35d9066161 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml +++ b/kubernetes/helm_charts/datapipeline_jobs/templates/flink_job_deployment.yaml @@ -122,7 +122,7 @@ spec: {{- if eq .Values.checkpoint_store_type "gcloud" }} "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", - "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", @@ -208,7 +208,7 @@ spec: {{- if eq .Values.checkpoint_store_type "gcloud" }} "-Dfs.gs.auth.client.id={{ .Values.cloud_storage_key }}", "-Dfs.gs.auth.client.secret={{ .Values.cloud_storage_secret }}", - "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}" + "-Dfs.gs.project.id={{ .Values.cloud_storage_project_id }}", {{- end }} "-Dweb.submit.enable=false", "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index 29c1373a8e..a2fe113117 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -149,7 +149,7 @@ base_config: | statebackend { blob { storage { - account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% endif %}" + account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% elif checkpoint_store_type == "gcloud" %}{{ flink_dp_storage_container }}{% endif %}" container = "{{ flink_container_name }}" checkpointing.dir = "checkpoint" } @@ -158,6 +158,8 @@ base_config: | base.url = "wasbs://"${job.statebackend.blob.storage.container}"@"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.checkpointing.dir} {% elif checkpoint_store_type == "s3" %} base.url = "s3://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} +{% elif checkpoint_store_type == "gcloud" %} + base.url = "gs://"${job.statebackend.blob.storage.account}"/"${job.statebackend.blob.storage.container}"/"${job.statebackend.blob.storage.checkpointing.dir} {% endif %} } } From 5a24334369227aedfc109b3f738fd0698e96708c Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 18 Jan 2023 12:12:08 +0530 Subject: [PATCH 19/31] csp migration variables update --- kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index e371c56817..9bb98bbf4c 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -1,6 +1,6 @@ flink_namespace: "flink-{{ env }}" imagepullsecrets: "{{ env }}-registry-secret" -service_monitor_enabled: true +service_monitor_enabled: false ### Job manager related vars jobmanager_rpc_port: 6123 jobmanager_blob_port: 6124 From d575aecca5c4909a19ab88acbd99a053e4d79b50 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Wed, 18 Jan 2023 12:18:42 +0530 Subject: [PATCH 20/31] csp migration variables update --- kubernetes/helm_charts/datapipeline_jobs/values.j2 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/helm_charts/datapipeline_jobs/values.j2 b/kubernetes/helm_charts/datapipeline_jobs/values.j2 index a2fe113117..06042eddff 100644 --- a/kubernetes/helm_charts/datapipeline_jobs/values.j2 +++ b/kubernetes/helm_charts/datapipeline_jobs/values.j2 @@ -150,7 +150,7 @@ base_config: | blob { storage { account = "{% if checkpoint_store_type == "azure" %}{{ azure_account }}.blob.core.windows.net{% elif checkpoint_store_type == "s3" %}{{ flink_dp_storage_container }}{% elif checkpoint_store_type == "gcloud" %}{{ flink_dp_storage_container }}{% endif %}" - container = "{{ flink_container_name }}" + container = "{{ cloud_storage_flink_bucketname }}" checkpointing.dir = "checkpoint" } } From 214c977d2abc43c8af2c048b0bb3364c1ad93516 Mon Sep 17 00:00:00 2001 From: Sadanand <100120230+SadanandGowda@users.noreply.github.com> Date: Wed, 18 Jan 2023 13:04:24 +0530 Subject: [PATCH 21/31] Update main.yml --- kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index 9bb98bbf4c..43b7aac364 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -34,7 +34,7 @@ azure_account: "{{ cloud_private_storage_accountname }}" azure_secret: "{{ cloud_private_storage_secret }}" flink_container_name: dev-data-store flink_dp_storage_container: "" -checkpoint_store_type: azure +checkpoint_store_type: gcloud checkpoint_interval: 60000 checkpoint_pause_between_seconds: 5000 checkpoint_compression_enabled: true From f4696c792490f32796828cafd1a02a8b31efa209 Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 6 Nov 2023 16:53:43 +0530 Subject: [PATCH 22/31] Updated cloud service provider changes --- ansible/inventory/env/group_vars/all.yml | 6 +++--- .../roles/artifacts-download-azure/tasks/main.yml | 4 ++-- .../roles/artifacts-upload-azure/tasks/main.yml | 4 ++-- ansible/roles/es-azure-snapshot/defaults/main.yml | 4 ++-- kubernetes/ansible/roles/druid/defaults/main.yml | 14 +++++++------- .../roles/flink-jobs-deploy/defaults/main.yml | 2 +- 6 files changed, 17 insertions(+), 17 deletions(-) diff --git a/ansible/inventory/env/group_vars/all.yml b/ansible/inventory/env/group_vars/all.yml index f945a09f41..c7d0c3ee8c 100644 --- a/ansible/inventory/env/group_vars/all.yml +++ b/ansible/inventory/env/group_vars/all.yml @@ -6,9 +6,9 @@ log4j_appender_kafka_topic: "{{env}}.telemetry.backend" # Shall we change the value to telemetry-data-store in dev also? # What's the implication # Is it azure blob or s3 bucket?? -channel_data_exhaust_bucket: dev-data-store +cloud_storage_telemetry_bucketname: dev-data-store secrets_path: '{{inventory_dir}}/secrets.yml' -artifacts_container: "{{dp_vault_artifacts_container}}" +cloud_storage_artifacts_bucketname: "{{dp_vault_artifacts_container}}" report_azure_account_name: "{{cloud_private_storage_accountname}}" report_azure_storage_secret: "{{cloud_private_storage_secret}}" @@ -52,7 +52,7 @@ secor: azure: account_name: "{{cloud_private_storage_accountname}}" account_key: "{{cloud_private_storage_secret}}" - container_name: "{{channel_data_exhaust_bucket}}" + container_name: "{{cloud_storage_telemetry_bucketname}}" paths: ['/mount/secor', '/mount/secor/reports', '/mount/secor/logs', '/home/analytics/sbin', '/mount/data/analytics'] channel: "{{secor_alerts_slack_channel}}" diff --git a/ansible/roles/artifacts-download-azure/tasks/main.yml b/ansible/roles/artifacts-download-azure/tasks/main.yml index db79bc213f..25fac02a27 100644 --- a/ansible/roles/artifacts-download-azure/tasks/main.yml +++ b/ansible/roles/artifacts-download-azure/tasks/main.yml @@ -1,8 +1,8 @@ --- - name: Ensure azure blob storage container exists - command: az storage container exists --name {{ artifacts_container }} + command: az storage container exists --name {{ cloud_storage_artifacts_bucketname }} - name: Download from azure blob storage - command: az storage blob download -c {{ artifacts_container }} --name {{ artifact }} -f {{ artifact_path }} + command: az storage blob download -c {{ cloud_storage_artifacts_bucketname }} --name {{ artifact }} -f {{ artifact_path }} async: 3600 poll: 10 diff --git a/ansible/roles/artifacts-upload-azure/tasks/main.yml b/ansible/roles/artifacts-upload-azure/tasks/main.yml index 785dc1a455..91e66a5eec 100644 --- a/ansible/roles/artifacts-upload-azure/tasks/main.yml +++ b/ansible/roles/artifacts-upload-azure/tasks/main.yml @@ -1,8 +1,8 @@ --- - name: Ensure azure blob storage container exists - command: az storage container create --name {{ artifacts_container }} + command: az storage container create --name {{ cloud_storage_artifacts_bucketname }} - name: Upload to azure blob storage - command: az storage blob upload -c {{ artifacts_container }} --name {{ artifact }} -f {{ artifact_path }} + command: az storage blob upload -c {{ cloud_storage_artifacts_bucketname }} --name {{ artifact }} -f {{ artifact_path }} async: 3600 poll: 10 diff --git a/ansible/roles/es-azure-snapshot/defaults/main.yml b/ansible/roles/es-azure-snapshot/defaults/main.yml index 39ca274c0e..82890b2813 100644 --- a/ansible/roles/es-azure-snapshot/defaults/main.yml +++ b/ansible/roles/es-azure-snapshot/defaults/main.yml @@ -10,12 +10,12 @@ snapshot_create_request_body: { es_snapshot_host: "localhost" snapshot_base_path: "default" -es_azure_backup_container_name: "elasticsearch-snapshots" +cloud_storage_esbackup_bucketname: "elasticsearch-snapshots" # This variable is added for the below reason - # 1. Introduce a common variable for various clouds. In case of azure, it refers to container name, in case of aws / gcp, it refers to folder name # 2. We want to avoid too many new variable introduction / replacement in first phase. Hence we will reuse the existing variable defined in private repo # or other default files and just assign the value to the newly introduced common variable # 3. After few releases, we will remove the older variables and use only the new variables across the repos -es_backup_storage: "{{ es_azure_backup_container_name }}" +es_backup_storage: "{{ cloud_storage_esbackup_bucketname }}" diff --git a/kubernetes/ansible/roles/druid/defaults/main.yml b/kubernetes/ansible/roles/druid/defaults/main.yml index 509b3d6dcf..98dd6c092f 100644 --- a/kubernetes/ansible/roles/druid/defaults/main.yml +++ b/kubernetes/ansible/roles/druid/defaults/main.yml @@ -128,9 +128,9 @@ druid_configs: druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" #Druid Azure Details druid_storage_type: "azure" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure druid_log_azure_container: "{{ druid_azure_container_name }}" druid_log_azure_folder: "druidlogs" @@ -206,9 +206,9 @@ druid_configs: druid_postgres_user: "druid@{{ postgres.db_url }}" #Druid Azure Details druid_postgres_pass: "{{ dp_vault_druid_postgress_pass }}" - azure_account_name: "{{ sunbird_druid_storage_account_name }}" - azure_storage_secret: "{{ sunbird_druid_storage_account_key }}" - azure_container: "{{ druid_azure_container_name }}" + azure_account_name: "{{ cloud_private_storage_accountname }}" + azure_storage_secret: "{{ cloud_private_storage_secret }}" + azure_container: "{{ cloud_storage_telemetry_bucketname }}" #Logging the indexing logs to azure druid_log_azure_container: "{{ druid_azure_container_name }}" druid_log_azure_folder: "druidlogs" @@ -266,4 +266,4 @@ druid_configs: druid_router_http_numMaxThreads: 100 druid_server_http_numThreads: 100 druid_router_managementProxy_enabled: true - druid_router_replicas: 1 \ No newline at end of file + druid_router_replicas: 1 diff --git a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml index 43b7aac364..17bd0257bb 100644 --- a/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml +++ b/kubernetes/ansible/roles/flink-jobs-deploy/defaults/main.yml @@ -32,7 +32,7 @@ taskmanager_heap_memory: 1024 postgres_max_connections: 2 azure_account: "{{ cloud_private_storage_accountname }}" azure_secret: "{{ cloud_private_storage_secret }}" -flink_container_name: dev-data-store +cloud_storage_flink_bucketname: dev-data-store flink_dp_storage_container: "" checkpoint_store_type: gcloud checkpoint_interval: 60000 From 7517485f580ee25dc3b447a4aa6eafdbb9fe5490 Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Wed, 17 May 2023 18:05:54 +0530 Subject: [PATCH 23/31] LR-539 | Encryption key length update added --- ansible/roles/postgres-db-update/tasks/main.yml | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index 631299b253..86d27bb3de 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -109,7 +109,7 @@ - name: update {{ env }}_job_request shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ADD COLUMN IF NOT EXISTS processed_batches text" run_once: true - tags: update + tags: update - name: create {{ env }}_experiment_definition shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS {{ env }}_experiment_definition (exp_id VARCHAR(50), created_by VARCHAR(50), created_on TIMESTAMP, criteria VARCHAR(100), exp_data VARCHAR(300), exp_description VARCHAR(200), exp_name VARCHAR(50), stats VARCHAR(300), status VARCHAR(50), status_message VARCHAR(50), updated_by VARCHAR(50), updated_on TIMESTAMP, PRIMARY KEY(exp_id))" @@ -164,4 +164,9 @@ - name: update report shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE report ALTER COLUMN type TYPE varchar(10)" run_once: true - tags: update \ No newline at end of file + tags: update + +- name: update encryption_key in {{ env }}_job_request + shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" + run_once: true + tags: update From 0cde6177f3f3e080e6033c6a9fa7fc7b2558e79f Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Wed, 17 May 2023 18:07:45 +0530 Subject: [PATCH 24/31] LR-539 | Encryption key length update added --- ansible/roles/postgres-db-update/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index 86d27bb3de..c35fe3c767 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -109,7 +109,7 @@ - name: update {{ env }}_job_request shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ADD COLUMN IF NOT EXISTS processed_batches text" run_once: true - tags: update + tags: update - name: create {{ env }}_experiment_definition shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "CREATE TABLE IF NOT EXISTS {{ env }}_experiment_definition (exp_id VARCHAR(50), created_by VARCHAR(50), created_on TIMESTAMP, criteria VARCHAR(100), exp_data VARCHAR(300), exp_description VARCHAR(200), exp_name VARCHAR(50), stats VARCHAR(300), status VARCHAR(50), status_message VARCHAR(50), updated_by VARCHAR(50), updated_on TIMESTAMP, PRIMARY KEY(exp_id))" From 1268b302cfce93795c0004d7f63f610d2c11529f Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Thu, 14 Sep 2023 10:59:30 +0530 Subject: [PATCH 25/31] LR-539 | Encryption key length update added --- ansible/roles/postgres-db-update/tasks/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/postgres-db-update/tasks/main.yml b/ansible/roles/postgres-db-update/tasks/main.yml index c35fe3c767..e458f09ed8 100644 --- a/ansible/roles/postgres-db-update/tasks/main.yml +++ b/ansible/roles/postgres-db-update/tasks/main.yml @@ -167,6 +167,6 @@ tags: update - name: update encryption_key in {{ env }}_job_request - shell: PGPASSWORD="{{ postgres.db_password }}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" + shell: PGPASSWORD="{{postgres.db_password}}" psql -U "{{ postgres.db_username }}" -d "{{ postgres.db_name }}" -h "{{ postgres.db_url }}" -p "{{ postgres.db_port }}" -c "ALTER TABLE {{ env }}_job_request ALTER encryption_key TYPE varchar(500)" run_once: true tags: update From c3c8870764c5b20e9225c864d274a28279d21162 Mon Sep 17 00:00:00 2001 From: kumarks1122 Date: Thu, 14 Sep 2023 11:21:47 +0530 Subject: [PATCH 26/31] LR-539 | Testcase fixes added --- .../sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala index 171f7e4f71..d7949224af 100644 --- a/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala +++ b/data-pipeline-flink/assessment-aggregator/src/test/scala/org/sunbird/dp/spec/AssessmentAggregatorTaskTestSpec.scala @@ -159,9 +159,9 @@ class AssessmentAggregatorTaskTestSpec extends BaseTestSpec { when(mockKafkaUtil.kafkaStringSink(forceValidationAssessmentConfig.kafkaCertIssueTopic)).thenReturn(new certificateIssuedEventsSink) val task = new AssessmentAggregatorStreamTask(forceValidationAssessmentConfig, mockKafkaUtil) task.process() - BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.batchSuccessCount}").getValue() should be(3) + BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.batchSuccessCount}").getValue() should be(1) BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.cacheHitCount}").getValue() should be(5) - BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.apiHitSuccessCount}").getValue() should be(2) + BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.apiHitSuccessCount}").getValue() should be(0) BaseMetricsReporter.gaugeMetrics(s"${assessmentConfig.jobName}.${assessmentConfig.ignoredEventsCount}").getValue() should be(1) } From 4518d6153d455294356d0925e71c024ded27ba2d Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Thu, 28 Sep 2023 20:56:12 +0530 Subject: [PATCH 27/31] Fix analytics user dir permission issue --- ansible/roles/analytics-bootstrap-always/tasks/main.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/ansible/roles/analytics-bootstrap-always/tasks/main.yml b/ansible/roles/analytics-bootstrap-always/tasks/main.yml index 065a3b3afb..da30d5e5da 100644 --- a/ansible/roles/analytics-bootstrap-always/tasks/main.yml +++ b/ansible/roles/analytics-bootstrap-always/tasks/main.yml @@ -17,6 +17,10 @@ createhome: yes group: "{{ analytics_group }}" +- name: change permission + become: yes + command: chown -R "{{ analytics_user }}":"{{ analytics_user }}" /home/"{{ analytics_user }}" + - name: Installing packages become: yes From 94678a060664adb0aa6671070d241611604a467d Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Thu, 28 Sep 2023 20:56:44 +0530 Subject: [PATCH 28/31] Update zookeeper download url --- ansible/roles/zookeeper-upgrade/defaults/main.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ansible/roles/zookeeper-upgrade/defaults/main.yml b/ansible/roles/zookeeper-upgrade/defaults/main.yml index 6425d3340f..b8d6d3fd99 100644 --- a/ansible/roles/zookeeper-upgrade/defaults/main.yml +++ b/ansible/roles/zookeeper-upgrade/defaults/main.yml @@ -1,6 +1,6 @@ --- zookeeper_version: 3.6.3 -zookeeper_url: "https://downloads.apache.org/zookeeper/stable/apache-zookeeper-{{ zookeeper_version }}-bin.tar.gz" +zookeeper_url: "https://archive.apache.org/dist/zookeeper/zookeeper-{{ zookeeper_version }}/apache-zookeeper-{{ zookeeper_version }}-bin.tar.gz" zookeeper_port: 2181 zk_jvm_opts: "-Xms128m -Xmx128m" verify: True From 910635eb109329de6d682e74e9f46556a8aa6328 Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Fri, 29 Sep 2023 14:09:51 +0530 Subject: [PATCH 29/31] Fix docker creds secrets issue when having specialcharacters --- kubernetes/ansible/bootstrap_minimal.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kubernetes/ansible/bootstrap_minimal.yaml b/kubernetes/ansible/bootstrap_minimal.yaml index f9facd6152..ae42a2b35e 100644 --- a/kubernetes/ansible/bootstrap_minimal.yaml +++ b/kubernetes/ansible/bootstrap_minimal.yaml @@ -33,7 +33,7 @@ ignore_errors: yes - name: Creating docker secrets - shell: "kubectl create secret docker-registry {{ imagepullsecrets }} --namespace {{ item }} --docker-server {{ core_vault_docker_registry_url }} --docker-username {{ core_vault_docker_registry_user }} --docker-password {{ core_vault_docker_registry_password }} --dry-run=client -o=yaml | kubectl apply -f -" + shell: "kubectl create secret docker-registry {{ imagepullsecrets }} --namespace {{ item }} --docker-server {{ core_vault_docker_registry_url }} --docker-username {{ core_vault_docker_registry_user }} --docker-password '{{ core_vault_docker_registry_password }}' --dry-run=client -o=yaml | kubectl apply -f -" with_items: - "flink-{{ env }}" - "flink-kp-{{ env }}" From 141c51f1ecb6ccf0cd28a7f55a0b40f4250d1ea5 Mon Sep 17 00:00:00 2001 From: santhosh-tg Date: Fri, 29 Sep 2023 16:30:45 +0530 Subject: [PATCH 30/31] Fix erorr - no matches for kind RoleBinding in version rbac.authorization.k8s.io/v1beta1 --- kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml | 2 +- .../helm_charts/bootstrap/reloader/templates/rolebinding.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml index 5827f5cdcb..cbd5979451 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/role.yaml @@ -1,5 +1,5 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: rbac.authorization.k8s.io/v1 kind: Role metadata: labels: diff --git a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml index 94fb1f838b..08868f0e06 100755 --- a/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml +++ b/kubernetes/helm_charts/bootstrap/reloader/templates/rolebinding.yaml @@ -1,5 +1,5 @@ {{- if and (not (.Values.reloader.watchGlobally)) (.Values.reloader.rbac.enabled) }} -apiVersion: rbac.authorization.k8s.io/v1beta1 +apiVersion: rbac.authorization.k8s.io/v1 kind: RoleBinding metadata: labels: From b08aa1f9a60fac57117a23fb8dc8700076f597aa Mon Sep 17 00:00:00 2001 From: SadanandGowda Date: Mon, 6 Nov 2023 17:07:04 +0530 Subject: [PATCH 31/31] Updated cloud service provider changes --- ansible/inventory/env/group_vars/all.yml | 143 ++++++++++-------- .../tasks/main.yml | 25 +-- .../defaults/main.yml | 15 +- 3 files changed, 100 insertions(+), 83 deletions(-) diff --git a/ansible/inventory/env/group_vars/all.yml b/ansible/inventory/env/group_vars/all.yml index c7d0c3ee8c..d11ccd88f0 100644 --- a/ansible/inventory/env/group_vars/all.yml +++ b/ansible/inventory/env/group_vars/all.yml @@ -7,7 +7,7 @@ log4j_appender_kafka_topic: "{{env}}.telemetry.backend" # What's the implication # Is it azure blob or s3 bucket?? cloud_storage_telemetry_bucketname: dev-data-store -secrets_path: '{{inventory_dir}}/secrets.yml' +secrets_path: "{{inventory_dir}}/secrets.yml" cloud_storage_artifacts_bucketname: "{{dp_vault_artifacts_container}}" report_azure_account_name: "{{cloud_private_storage_accountname}}" @@ -34,7 +34,7 @@ telemetry_schema_directory: /etc/{{env}}/telemetry telemetry_schema_path: /etc/{{env}}/telemetry/schemas schema_repo_url: https://github.com/project-sunbird/sunbird-data-pipeline.git # Create learningall group with LP ips -cassandra_host: "{{ groups['cassandra'][0] }}" +cassandra_host: "{{ groups['cassandra'][0] }}" core_cassandra_host: "{{ groups['core-cassandra'][0] }}" lp_cassandra_host: "{{ groups['lp-cassandra'][0] }}" report_cassandra_host: "{{ groups['report-cassandra'][0] }}" @@ -46,36 +46,45 @@ sbin_path: "{{ analytics_user_home }}/sbin" # Secor vars secor: - properties: ['secor.azure', 'secor.common', 'secor', 'secor.partition', 'log4j'] - artifact_dir: /mount/secor - artifact_ver: "0.29" - azure: - account_name: "{{cloud_private_storage_accountname}}" - account_key: "{{cloud_private_storage_secret}}" - container_name: "{{cloud_storage_telemetry_bucketname}}" - paths: ['/mount/secor', '/mount/secor/reports', '/mount/secor/logs', '/home/analytics/sbin', '/mount/data/analytics'] - channel: "{{secor_alerts_slack_channel}}" + properties: + ["secor.azure", "secor.common", "secor", "secor.partition", "log4j"] + artifact_dir: /mount/secor + artifact_ver: "0.29" + azure: + account_name: "{{cloud_private_storage_accountname}}" + account_key: "{{cloud_private_storage_secret}}" + container_name: "{{cloud_storage_telemetry_bucketname}}" + + paths: + [ + "/mount/secor", + "/mount/secor/reports", + "/mount/secor/logs", + "/home/analytics/sbin", + "/mount/data/analytics", + ] + channel: "{{secor_alerts_slack_channel}}" # postgres # list of databases to be created # Can move this dictionary to postgres role; but incase we want to generalize roles!! postgresql_databases: - - name: analytics - owner: analytics + - name: analytics + owner: analytics postgresql_users: - - name: analytics - password: "{{dp_vault_pgdb_password}}" + - name: analytics + password: "{{dp_vault_pgdb_password}}" postgres: - db_url: "{{ groups['postgres'][0] }}" - db_username: analytics - db_name: analytics - db_password: "{{dp_vault_pgdb_password}}" - db_table_name: "{{env}}_consumer_channel_mapping" - db_port: 5432 - db_admin_user: analytics - db_admin_password: "{{dp_vault_pgdb_admin_password}}" + db_url: "{{ groups['postgres'][0] }}" + db_username: analytics + db_name: analytics + db_password: "{{dp_vault_pgdb_password}}" + db_table_name: "{{env}}_consumer_channel_mapping" + db_port: 5432 + db_admin_user: analytics + db_admin_password: "{{dp_vault_pgdb_admin_password}}" postgres_address_space: 0.0.0.0/0 # Postgres trust address space @@ -84,9 +93,9 @@ lp_composite_search_host: "{{ groups['composite-search-cluster'][0] }}" lp_search: "http://{{private_ingressgateway_ip}}/search" lp_url: http://{{ groups['learning'][0] }}:8080/learning-service service: - search: - url: http://{{private_ingressgateway_ip}}/search - path: /v3/search + search: + url: http://{{private_ingressgateway_ip}}/search + path: /v3/search cassandra_hierarchy_store_prefix: "{{env}}_" data_exhaust_token: "{{dp_vault_data_exhaust_token}}" @@ -106,46 +115,46 @@ CONTAINER_NAME_SAMZA: samza-logs script_path: /usr/local/hadoop job_names: - DeDuplication_1: - job_file_name: 'de-duplication' - DeNormalization_1: - job_file_name: 'de-normalization' - DruidEventsValidator_1: - job_file_name: 'druid-events-validator' - EventsRouter_1: - job_file_name: 'events-router' - TelemetryExtractor_1: - job_file_name: 'telemetry-extractor' - TelemetryLocationUpdater_1: - job_file_name: 'telemetry-location-updater' - TelemetryRouter_1: - job_file_name: 'telemetry-router' - TelemetryRedacter_1: - job_file_name: 'telemetry-redacter' - TelemetryValidator_1: - job_file_name: 'telemetry-validator' - DeviceProfileUpdater_1: - job_file_name: 'device-profile-updater' - AssessmentAggregator_1: - job_file_name: 'assessment-aggregator' - DerivedDeDuplication_1: - job_file_name: 'derived-de-duplication' - UserCacheUpdater_1: - job_file_name: 'user-cache-updater' - ContentCacheUpdater_1: - job_file_name: 'content-cache-updater' - ShareEventsFlattener_1: - job_file_name: 'share-events-flattener' + DeDuplication_1: + job_file_name: "de-duplication" + DeNormalization_1: + job_file_name: "de-normalization" + DruidEventsValidator_1: + job_file_name: "druid-events-validator" + EventsRouter_1: + job_file_name: "events-router" + TelemetryExtractor_1: + job_file_name: "telemetry-extractor" + TelemetryLocationUpdater_1: + job_file_name: "telemetry-location-updater" + TelemetryRouter_1: + job_file_name: "telemetry-router" + TelemetryRedacter_1: + job_file_name: "telemetry-redacter" + TelemetryValidator_1: + job_file_name: "telemetry-validator" + DeviceProfileUpdater_1: + job_file_name: "device-profile-updater" + AssessmentAggregator_1: + job_file_name: "assessment-aggregator" + DerivedDeDuplication_1: + job_file_name: "derived-de-duplication" + UserCacheUpdater_1: + job_file_name: "user-cache-updater" + ContentCacheUpdater_1: + job_file_name: "content-cache-updater" + ShareEventsFlattener_1: + job_file_name: "share-events-flattener" druid_ingestion_specs: - telemetry-events: - ingestion_file_name: 'telemetry_index_kafka' - summary-events: - ingestion_file_name: 'summary_index_kafka' - telemtry-feedback-events: - ingestion_file_name: 'telemetry_feedback_index_kafka' - pipeline-metrics: - ingestion_file_name: 'pipeline_metrics_index_kafka' + telemetry-events: + ingestion_file_name: "telemetry_index_kafka" + summary-events: + ingestion_file_name: "summary_index_kafka" + telemtry-feedback-events: + ingestion_file_name: "telemetry_feedback_index_kafka" + pipeline-metrics: + ingestion_file_name: "pipeline_metrics_index_kafka" #Druid Proxy APi service sunbird_druid_broker_host: "http://{{ groups['raw-broker'][0] }}" @@ -154,9 +163,9 @@ sunbird_learner_service_url: "http://{{private_ingressgateway_ip}}/learner" location_search_url: "{{ domain_name }}/api/data/" location_search_token: "Bearer {{ sunbird_api_auth_token }}" -druid_report_url_endpoint : "{{ proto}}://{{domain_name}}/api/data/v1/report/jobs" -druid_report_url : "{{ proto}}://{{domain_name}}/api/data/v1/" -druid_report_token : "Bearer {{ sunbird_api_auth_token }}" +druid_report_url_endpoint: "{{ proto}}://{{domain_name}}/api/data/v1/report/jobs" +druid_report_url: "{{ proto}}://{{domain_name}}/api/data/v1/" +druid_report_token: "Bearer {{ sunbird_api_auth_token }}" #redis multiprocess config content_port: 6379 diff --git a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml index bc94c74c42..913837c651 100644 --- a/ansible/roles/provision-azure-spark-cluster/tasks/main.yml +++ b/ansible/roles/provision-azure-spark-cluster/tasks/main.yml @@ -1,11 +1,18 @@ - name: Adding azure blob variable to spark env file lineinfile: path: "{{spark_folder}}/conf/spark-env.sh" - line: '{{item.var}}={{item.value}}' + line: "{{item.var}}={{item.value}}" regexp: "{{ item.var }}.*" with_items: - - {var: 'azure_storage_key', value: '{{ cloud_private_storage_accountname }}'} - - {var: 'azure_storage_secret', value: '{{ cloud_private_storage_secret }}'} + - { + var: "azure_storage_key", + value: "{{ cloud_private_storage_accountname }}", + } + - { + var: "azure_storage_secret", + value: "{{ cloud_private_storage_secret }}", + } + no_log: true when: cloud_service_provider == "azure" @@ -15,18 +22,18 @@ path: "{{ spark_folder }}/jars/{{item.var}}-{{item.value}}.jar" state: absent with_items: - - {var: 'guava', value: '{{ guava_default_jre_version }}'} - - {var: 'guice', value: '{{ guice_default_version }}'} - - {var: 'guice-servlet', value: '{{ guice_default_version }}'} - + - { var: "guava", value: "{{ guava_default_jre_version }}" } + - { var: "guice", value: "{{ guice_default_version }}" } + - { var: "guice-servlet", value: "{{ guice_default_version }}" } + - name: Download guava_jre_url and copy to Spark jars folder become: yes get_url: url={{ guava_jre_url }} dest={{ spark_folder }}/jars/guava-{{guava_jre_version}}.jar timeout=1000 force=no - + - name: Download log4j api and copy to Spark jars folder become: yes get_url: url={{ log4j_api_url }} dest={{ spark_folder }}/jars/log4j-api-{{log4j_version}}.jar timeout=1000 force=no - + - name: Download log4j core and copy to Spark jars folder become: yes get_url: url={{ log4j_core_url }} dest={{ spark_folder }}/jars/log4j-core-{{log4j_version}}.jar timeout=1000 force=no diff --git a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml index f31781473e..6f85c930d5 100644 --- a/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml +++ b/ansible/roles/secor-telemetry-backup-deploy/defaults/main.yml @@ -8,7 +8,8 @@ azure_account_key: "{{cloud_private_storage_secret}}" telemetry_ingestion_topic: "{{ env }}.telemetry.ingest" secor: - properties: ['secor.azure', 'secor.common', 'secor', 'secor.partition', 'log4j'] + properties: + ["secor.azure", "secor.common", "secor", "secor.partition", "log4j"] artifact_dir: /mount/secor secor_service_name: @@ -81,7 +82,7 @@ secor_service_name: max_file_age: "14400" partition_prefix_enabled: "true" partition_prefix_key: "eid" - partition_prefix_mapping: "{\"ME_SESSION_SUMMARY\":\"summary\",\"ME_WORKFLOW_SUMMARY\":\"summary\",\"ME_DEIVCE_SUMMARY\":\"summary\",\"ME_ITEM_SUMMARY\":\"summary\",\"ME_WORKFLOW_USAGE_SUMMARY\":\"summary\",\"ME_ITEM_USAGE_SUMMARY\":\"summary\",\"ME_USAGE_SUMMARY\":\"summary\",\"ME_DIALCODE_USAGE_SUMMARY\":\"summary\",\"DEFAULT\":\"raw\"}" + partition_prefix_mapping: '{"ME_SESSION_SUMMARY":"summary","ME_WORKFLOW_SUMMARY":"summary","ME_DEIVCE_SUMMARY":"summary","ME_ITEM_SUMMARY":"summary","ME_WORKFLOW_USAGE_SUMMARY":"summary","ME_ITEM_USAGE_SUMMARY":"summary","ME_USAGE_SUMMARY":"summary","ME_DIALCODE_USAGE_SUMMARY":"summary","DEFAULT":"raw"}' output_file_pattern: "{partition}-{kafkaPartition}-{currentTimestamp}.json" service_description: "Denormalized Events Backup" service_monitor_schedule_hr: "23" @@ -106,7 +107,7 @@ secor_service_name: service_monitor_schedule_hr: "23" service_monitor_schedule_min: "50" message_channel_identifier: "" - message_parser: "com.pinterest.secor.parser.PatternDateMessageParser" + message_parser: "com.pinterest.secor.parser.PatternDateMessageParser" channel-telemetry-backup: consumer_group: "{{ env }}.telemetry.channel.backup" base_path: "data-exhaust/raw" @@ -144,7 +145,7 @@ secor_service_name: service_monitor_schedule_hr: "23" service_monitor_schedule_min: "50" message_channel_identifier: "derivedlocationdata.state" - message_parser: "com.pinterest.secor.parser.ChannelDateMessageParser" + message_parser: "com.pinterest.secor.parser.ChannelDateMessageParser" extractor-failed-backup: consumer_group: "{{ env }}.extractor.failed.backup" base_path: "extractor-failed" @@ -252,7 +253,7 @@ secor_service_name: max_file_age: "600" partition_prefix_enabled: "true" partition_prefix_key: "eid" - partition_prefix_mapping: "{\"ME_SESSION_SUMMARY\":\"session_summary\",\"ME_WORKFLOW_SUMMARY\":\"workflow_summary\",\"ME_DEIVCE_SUMMARY\":\"device_summary\",\"ME_ITEM_SUMMARY\":\"item_summary\",\"ME_WORKFLOW_USAGE_SUMMARY\":\"workflow_usage_summary\",\"ME_ITEM_USAGE_SUMMARY\":\"item_usage_summary\",\"ME_USAGE_SUMMARY\":\"usage_summary\",\"ME_DIALCODE_USAGE_SUMMARY\":\"dialcode_usage_summary\",\"DEFAULT\":\"me\"}" + partition_prefix_mapping: '{"ME_SESSION_SUMMARY":"session_summary","ME_WORKFLOW_SUMMARY":"workflow_summary","ME_DEIVCE_SUMMARY":"device_summary","ME_ITEM_SUMMARY":"item_summary","ME_WORKFLOW_USAGE_SUMMARY":"workflow_usage_summary","ME_ITEM_USAGE_SUMMARY":"item_usage_summary","ME_USAGE_SUMMARY":"usage_summary","ME_DIALCODE_USAGE_SUMMARY":"dialcode_usage_summary","DEFAULT":"me"}' output_file_pattern: "{partition}-{currentTimestamp}.json" service_description: "Derived Telemetry Backup" service_monitor_schedule_hr: "23" @@ -312,7 +313,7 @@ secor_service_name: max_file_age: "3600" partition_prefix_enabled: "true" partition_prefix_key: "jobName" - partition_prefix_mapping: "{\"publish-pipeline\":\"publish_pipeline\",\"composite-search-indexer\":\"cs_index\",\"DEFAULT\":\"failed_events\"}" + partition_prefix_mapping: '{"publish-pipeline":"publish_pipeline","composite-search-indexer":"cs_index","DEFAULT":"failed_events"}' output_file_pattern: "{partition}-{currentTimestamp}.json" service_description: "Learning Failed Backup" service_monitor_schedule_hr: "23" @@ -413,4 +414,4 @@ secor_service_name: service_monitor_schedule_hr: "23" service_monitor_schedule_min: "57" message_channel_identifier: "" - message_parser: "com.pinterest.secor.parser.PatternDateMessageParser" \ No newline at end of file + message_parser: "com.pinterest.secor.parser.PatternDateMessageParser"