From 035d1abf51ed185ae4b2e8a2399bf35343f3be32 Mon Sep 17 00:00:00 2001 From: divyagovindaiah Date: Wed, 25 Sep 2024 14:34:00 +0530 Subject: [PATCH] lakehous-refactoring and updated images --- .gitignore | 17 ++++ helmcharts/.gitignore | 1 - helmcharts/global-cloud-values-aws.yaml | 2 +- helmcharts/images.yaml | 12 +-- .../templates/deployment.yaml | 99 +++++++------------ .../services/lakehouse-connector/values.yaml | 33 ++++--- .../templates/configmap-coordinator.yaml | 2 +- 7 files changed, 80 insertions(+), 86 deletions(-) create mode 100644 .gitignore delete mode 100644 helmcharts/.gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 00000000..e386cb33 --- /dev/null +++ b/.gitignore @@ -0,0 +1,17 @@ +**/target +*.iml +.idea +.classpath +*.DS_Store +*.terrahelpbkp +**/.terraform/* +automation-scripts/infra-setup/awscliv2.zip +automation-scripts/infra-setup/aws/dist +automation-scripts/infra-setup/aws/install +automation-scripts/infra-setup/aws/README.md +automation-scripts/infra-setup/aws/THIRD_PARTY_LICENSES + +# .tfstate files +*.tfstate +*.tfstate.* +!overrides.tfvars diff --git a/helmcharts/.gitignore b/helmcharts/.gitignore deleted file mode 100644 index ebc85a57..00000000 --- a/helmcharts/.gitignore +++ /dev/null @@ -1 +0,0 @@ -kitchen diff --git a/helmcharts/global-cloud-values-aws.yaml b/helmcharts/global-cloud-values-aws.yaml index 77c93bac..59660829 100644 --- a/helmcharts/global-cloud-values-aws.yaml +++ b/helmcharts/global-cloud-values-aws.yaml @@ -255,7 +255,7 @@ lakehouse-connector: fs.s3a.access.key: *s3-access-key fs.s3a.secret.key: *s3-secret-access-key serviceAccount: - create: *create_sa + create: false name: flink-sa annotations: <<: *flink_sa_annotation diff --git a/helmcharts/images.yaml b/helmcharts/images.yaml index ceafbf91..cef43509 100644 --- a/helmcharts/images.yaml +++ b/helmcharts/images.yaml @@ -30,23 +30,23 @@ images: &images dataset-api: &dataset-api # registry: "" repository: obsrv-api-service - tag: "1.1-RC" + tag: "1.1.0-RC" digest: "" web-console: &web-console # registry: "" - repository: "sb-obsrv-web-console" - tag: "1.1-RC" + repository: "obsrv-web-console" + tag: "1.1.0-RC" digest: "" ## PIPELINE ENTERPRISE unified-pipeline: &unified-pipeline # registry: "" repository: unified-pipeline - tag: "1.1-RC" + tag: "1.1.0-RC" digest: "" cache-indexer: &cache-indexer # registry: "" repository: cache-indexer - tag: "1.1-RC" + tag: "1.1.0-RC" digest: "" ## PIPELINE ENTERPRISE - INDIVIDUAL COMPONENTS @@ -537,4 +537,4 @@ internal: &internal ## Sourcing internal as root element, ## should need arise -<<: *internal +<<: *internal \ No newline at end of file diff --git a/helmcharts/services/lakehouse-connector/templates/deployment.yaml b/helmcharts/services/lakehouse-connector/templates/deployment.yaml index 3280423e..d3fef4ae 100644 --- a/helmcharts/services/lakehouse-connector/templates/deployment.yaml +++ b/helmcharts/services/lakehouse-connector/templates/deployment.yaml @@ -103,11 +103,44 @@ spec: path: core-site.xml name: {{ .Chart.Name }}-config name: flink-config-volume + containers: - name: {{ .Chart.Name }}-jobmanager # Main container to start job-manager image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" # image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" imagePullPolicy: Always + resources: + requests: + cpu: "{{ .Values.jobmanager.cpu_requests }}" + memory: "{{ .Values.jobmanager.memory_requests }}" + limits: + cpu: "{{ .Values.jobmanager.cpu_limits }}" + memory: "{{ .Values.jobmanager.memory_limits }}" + workingDir: /opt/flink + command: ["/opt/flink/bin/standalone-job.sh"] + args: ["start-foreground", +{{- if eq .Values.global.cloud_storage_provider "azure" }} +- -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }} +{{- end }} +# {{- if eq .Values.global.cloud_storage_provider "aws" }} +# - -Dfs.s3a.access.key={{ .Values.global.s3_access_key }} +# - -Dfs.s3a.secret.key={{ .Values.global.s3_secret_key }} +# {{- end }} +{{- if eq .Values.global.cloud_storage_provider "gcs" }} + "-Dgoogle.cloud.auth.service.account.enable=true", +{{- end }} +{{- $release_name := .Chart.Name }} + "--job-classname={{ (index .Values $release_name).job_classname }}", + "-Dweb.submit.enable=false", + "-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter", + "-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}", + "-Djobmanager.rpc.address={{ .Chart.Name }}-jobmanager", + "-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}", + "-Dparallelism.default=1", + "-Dblob.server.port={{ .Values.jobmanager.blob_port }}", + "-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}", + "--config.file.path", + "/data/flink/conf/{{ .Chart.Name }}.conf"] ports: - containerPort: 6123 name: rpc @@ -117,17 +150,9 @@ spec: name: query - containerPort: 8081 name: ui - resources: - requests: - cpu: "{{ .Values.jobmanager.cpu_requests }}" - memory: "{{ .Values.jobmanager.memory_requests }}" - limits: - cpu: "{{ .Values.jobmanager.cpu_limits }}" - memory: "{{ .Values.jobmanager.memory_limits }}" - volumeMounts: - - name: flink-config-volume - mountPath: /opt/flink/conf/log4j-console.properties - subPath: log4j-console.properties + env: + - name: HADOOP_CONF_DIR + value: "/opt/hadoop/etc/hadoop" volumeMounts: - mountPath: /opt/flink/conf/flink-conf.yaml name: flink-config-volume @@ -143,57 +168,7 @@ spec: subPath: log4j-console.properties - name: flink-config-volume mountPath: /opt/hadoop/etc/hadoop/core-site.xml - subPath: core-site.xml - workingDir: /opt/flink - args: ["jobmanager"] - env: - - name: HADOOP_CONF_DIR - value: "/opt/hadoop/etc/hadoop" - - name: FLINK_PROPERTIES - value: |+ - jobmanager.rpc.address: {{ .Chart.Name }}-jobmanager - jobmanager.rpc.port=6123 - metrics.reporters: prom - metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory - metrics.reporter.prom.host: {{ .Chart.Name }}-jobmanager - metrics.reporter.prom.port: 9250 - - name: {{ .Chart.Name }}-job-submit # side car to submit the hudi connector - image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}" - imagePullPolicy: Always - - command: - - /opt/flink/bin/flink - - run - - -m - - {{ .Chart.Name }}-jobmanager.flink.svc.cluster.local:8081 - - /opt/flink/custom-lib/hudi-connector-1.0.0.jar - - --config.file.path - - /data/flink/conf/{{ .Chart.Name }}.conf - {{- if eq .Values.global.cloud_storage_provider "azure" }} - - -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }} - {{- end }} - {{- if eq .Values.global.cloud_storage_provider "aws" }} - - -Dfs.s3a.access.key={{ .Values.global.s3_access_key }} - - -Dfs.s3a.secret.key={{ .Values.global.s3_secret_key }} - {{- end }} - volumeMounts: - - mountPath: /data/flink/conf/baseconfig.conf - name: flink-config-volume - subPath: base-config.conf - - mountPath: /data/flink/conf/{{ .Chart.Name }}.conf - name: flink-config-volume - subPath: {{ .Chart.Name }}.conf - workingDir: /opt/flink - volumes: - - configMap: - items: - - key: base-config - path: base-config.conf - - key: {{ .Chart.Name }} - path: {{ .Chart.Name }}.conf - name: {{ .Chart.Name }}-config - name: flink-config-volume - + subPath: core-site.xml --- apiVersion: apps/v1 kind: Deployment diff --git a/helmcharts/services/lakehouse-connector/values.yaml b/helmcharts/services/lakehouse-connector/values.yaml index 8337b181..626a00e1 100644 --- a/helmcharts/services/lakehouse-connector/values.yaml +++ b/helmcharts/services/lakehouse-connector/values.yaml @@ -3,7 +3,7 @@ imagepullsecrets: "" image: registry: sanketikahub repository: lakehouse-connector - tag: 1.0.3 + tag: 1.0.8 serviceMonitor: enabled: false replicaCount: 1 @@ -15,8 +15,8 @@ jobmanager: ui_port: 8081 prom_port: 9250 heap_memory: 1024 - cpu_requests: 0.25 - cpu_limits: 0.25 + cpu_requests: 1 + cpu_limits: 1 memory_requests: 1024Mi memory_limits: 1024Mi @@ -33,7 +33,7 @@ taskmanager: cpu_requests: 1 cpu_limits: 1 memory_requests: 1024Mi - memory_limits: 1024Mi + memory_limits: 2300Mi checkpoint_store_type: "s3" @@ -156,8 +156,8 @@ base_config: | task { parallelism = 1 consumer.parallelism = 1 - checkpointing.interval = 30000 - checkpointing.pause.between.seconds = 5000 + checkpointing.interval = 120000 + checkpointing.pause.between.seconds = 120000 restart-strategy.attempts = 3 restart-strategy.delay = 30000 # in milli-seconds } @@ -229,23 +229,25 @@ lakehouse-connector: base.path = "{{ .Values.global.hudi_metadata_bucket }}" } write { - tasks = 2 - task.max.memory = 256 - compaction.max.memory = 100 + tasks = 1 + task.max.memory = 512 + compaction.max.memory = 512 } metadata.enabled = true compaction.enabled = true + delta.commits = 5 + metadata.delta.commits = 5 write.tasks = 2 - write.batch.size = 16 - compaction.tasks = 2 + write.batch.size = 256 + compaction.tasks = 1 index.type = "BLOOM" delta.commits = 2 delta.seconds = 10 write.batch.size = 256 compaction.tasks = 2 - index.type = "BLOOM" - delta.commits = 2 - delta.seconds = 600 + write.lock.provider = "org.apache.hudi.client.transaction.lock.InProcessLockProvider" + write.concurrency.mode = "optimistic_concurrency_control" + metadata.index.column.stats.enabled = true compression.codec = "snappy" {{- if eq .Values.global.cloud_storage_provider "azure" }} fs.atomic_creation.support = "wasbs" @@ -256,12 +258,13 @@ lakehouse-connector: } flink-conf: |+ jobmanager.memory.flink.size: 1024m - taskmanager.memory.flink.size: 1024m + taskmanager.memory.flink.size: 2048m taskmanager.numberOfTaskSlots: 1 jobManager.numberOfTaskSlots: 1 parallelism.default: 1 jobmanager.execution.failover-strategy: region taskmanager.memory.network.fraction: 0.1 + taskmanager.memory.managed.fraction: 0.3 heartbeat.timeout: 8000 heartbeat.interval: 5000 state.savepoints.dir: file:///tmp diff --git a/helmcharts/services/trino/templates/configmap-coordinator.yaml b/helmcharts/services/trino/templates/configmap-coordinator.yaml index a4e5e946..9d9080f3 100644 --- a/helmcharts/services/trino/templates/configmap-coordinator.yaml +++ b/helmcharts/services/trino/templates/configmap-coordinator.yaml @@ -56,7 +56,7 @@ data: {{- if .Values.coordinator.config.memory.heapHeadroomPerNode }} memory.heap-headroom-per-node={{ .Values.coordinator.config.memory.heapHeadroomPerNode }} {{- end }} - discovery.uri=http://localhost:{{ .Values.service.port }} + discovery.uri=http://0.0.0.0:{{ .Values.service.port }} {{- if .Values.server.config.authenticationType }} http-server.authentication.type={{ .Values.server.config.authenticationType }} {{- end }}