Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor lakehouse and update images for services #178

Merged
merged 1 commit into from
Sep 30, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
**/target
*.iml
.idea
.classpath
*.DS_Store
*.terrahelpbkp
**/.terraform/*
automation-scripts/infra-setup/awscliv2.zip
automation-scripts/infra-setup/aws/dist
automation-scripts/infra-setup/aws/install
automation-scripts/infra-setup/aws/README.md
automation-scripts/infra-setup/aws/THIRD_PARTY_LICENSES

# .tfstate files
*.tfstate
*.tfstate.*
!overrides.tfvars
1 change: 0 additions & 1 deletion helmcharts/.gitignore

This file was deleted.

2 changes: 1 addition & 1 deletion helmcharts/global-cloud-values-aws.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ lakehouse-connector:
fs.s3a.access.key: *s3-access-key
fs.s3a.secret.key: *s3-secret-access-key
serviceAccount:
create: *create_sa
create: false
name: flink-sa
annotations:
<<: *flink_sa_annotation
Expand Down
12 changes: 6 additions & 6 deletions helmcharts/images.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,23 +30,23 @@ images: &images

dataset-api: &dataset-api # registry: ""
repository: obsrv-api-service
tag: "1.1-RC"
tag: "1.1.0-RC"
digest: ""

web-console: &web-console # registry: ""
repository: "sb-obsrv-web-console"
tag: "1.1-RC"
repository: "obsrv-web-console"
tag: "1.1.0-RC"
digest: ""

## PIPELINE ENTERPRISE
unified-pipeline: &unified-pipeline # registry: ""
repository: unified-pipeline
tag: "1.1-RC"
tag: "1.1.0-RC"
digest: ""

cache-indexer: &cache-indexer # registry: ""
repository: cache-indexer
tag: "1.1-RC"
tag: "1.1.0-RC"
digest: ""

## PIPELINE ENTERPRISE - INDIVIDUAL COMPONENTS
Expand Down Expand Up @@ -537,4 +537,4 @@ internal: &internal

## Sourcing internal as root element,
## should need arise
<<: *internal
<<: *internal
99 changes: 37 additions & 62 deletions helmcharts/services/lakehouse-connector/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,11 +103,44 @@ spec:
path: core-site.xml
name: {{ .Chart.Name }}-config
name: flink-config-volume

containers:
- name: {{ .Chart.Name }}-jobmanager # Main container to start job-manager
image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
# image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: Always
resources:
requests:
cpu: "{{ .Values.jobmanager.cpu_requests }}"
memory: "{{ .Values.jobmanager.memory_requests }}"
limits:
cpu: "{{ .Values.jobmanager.cpu_limits }}"
memory: "{{ .Values.jobmanager.memory_limits }}"
workingDir: /opt/flink
command: ["/opt/flink/bin/standalone-job.sh"]
args: ["start-foreground",
{{- if eq .Values.global.cloud_storage_provider "azure" }}
- -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }}
{{- end }}
# {{- if eq .Values.global.cloud_storage_provider "aws" }}
# - -Dfs.s3a.access.key={{ .Values.global.s3_access_key }}
# - -Dfs.s3a.secret.key={{ .Values.global.s3_secret_key }}
# {{- end }}
{{- if eq .Values.global.cloud_storage_provider "gcs" }}
"-Dgoogle.cloud.auth.service.account.enable=true",
{{- end }}
{{- $release_name := .Chart.Name }}
"--job-classname={{ (index .Values $release_name).job_classname }}",
"-Dweb.submit.enable=false",
"-Dmetrics.reporter.prom.class=org.apache.flink.metrics.prometheus.PrometheusReporter",
"-Dmetrics.reporter.prom.port={{ .Values.jobmanager.prom_port }}",
"-Djobmanager.rpc.address={{ .Chart.Name }}-jobmanager",
"-Djobmanager.rpc.port={{ .Values.jobmanager.rpc_port }}",
"-Dparallelism.default=1",
"-Dblob.server.port={{ .Values.jobmanager.blob_port }}",
"-Dqueryable-state.server.ports={{ .Values.jobmanager.query_port }}",
"--config.file.path",
"/data/flink/conf/{{ .Chart.Name }}.conf"]
ports:
- containerPort: 6123
name: rpc
Expand All @@ -117,17 +150,9 @@ spec:
name: query
- containerPort: 8081
name: ui
resources:
requests:
cpu: "{{ .Values.jobmanager.cpu_requests }}"
memory: "{{ .Values.jobmanager.memory_requests }}"
limits:
cpu: "{{ .Values.jobmanager.cpu_limits }}"
memory: "{{ .Values.jobmanager.memory_limits }}"
volumeMounts:
- name: flink-config-volume
mountPath: /opt/flink/conf/log4j-console.properties
subPath: log4j-console.properties
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
volumeMounts:
- mountPath: /opt/flink/conf/flink-conf.yaml
name: flink-config-volume
Expand All @@ -143,57 +168,7 @@ spec:
subPath: log4j-console.properties
- name: flink-config-volume
mountPath: /opt/hadoop/etc/hadoop/core-site.xml
subPath: core-site.xml
workingDir: /opt/flink
args: ["jobmanager"]
env:
- name: HADOOP_CONF_DIR
value: "/opt/hadoop/etc/hadoop"
- name: FLINK_PROPERTIES
value: |+
jobmanager.rpc.address: {{ .Chart.Name }}-jobmanager
jobmanager.rpc.port=6123
metrics.reporters: prom
metrics.reporter.prom.factory.class: org.apache.flink.metrics.prometheus.PrometheusReporterFactory
metrics.reporter.prom.host: {{ .Chart.Name }}-jobmanager
metrics.reporter.prom.port: 9250
- name: {{ .Chart.Name }}-job-submit # side car to submit the hudi connector
image: "{{ .Values.image.registry }}/{{ .Values.image.repository }}:{{ .Values.image.tag }}"
imagePullPolicy: Always

command:
- /opt/flink/bin/flink
- run
- -m
- {{ .Chart.Name }}-jobmanager.flink.svc.cluster.local:8081
- /opt/flink/custom-lib/hudi-connector-1.0.0.jar
- --config.file.path
- /data/flink/conf/{{ .Chart.Name }}.conf
{{- if eq .Values.global.cloud_storage_provider "azure" }}
- -Dfs.azure.account.key.{{ .Values.global.azure_storage_account_name }}.blob.core.windows.net={{ .Values.global.azure_storage_account_key }}
{{- end }}
{{- if eq .Values.global.cloud_storage_provider "aws" }}
- -Dfs.s3a.access.key={{ .Values.global.s3_access_key }}
- -Dfs.s3a.secret.key={{ .Values.global.s3_secret_key }}
{{- end }}
volumeMounts:
- mountPath: /data/flink/conf/baseconfig.conf
name: flink-config-volume
subPath: base-config.conf
- mountPath: /data/flink/conf/{{ .Chart.Name }}.conf
name: flink-config-volume
subPath: {{ .Chart.Name }}.conf
workingDir: /opt/flink
volumes:
- configMap:
items:
- key: base-config
path: base-config.conf
- key: {{ .Chart.Name }}
path: {{ .Chart.Name }}.conf
name: {{ .Chart.Name }}-config
name: flink-config-volume

subPath: core-site.xml
---
apiVersion: apps/v1
kind: Deployment
Expand Down
33 changes: 18 additions & 15 deletions helmcharts/services/lakehouse-connector/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ imagepullsecrets: ""
image:
registry: sanketikahub
repository: lakehouse-connector
tag: 1.0.3
tag: 1.0.8
serviceMonitor:
enabled: false
replicaCount: 1
Expand All @@ -15,8 +15,8 @@ jobmanager:
ui_port: 8081
prom_port: 9250
heap_memory: 1024
cpu_requests: 0.25
cpu_limits: 0.25
cpu_requests: 1
cpu_limits: 1
memory_requests: 1024Mi
memory_limits: 1024Mi

Expand All @@ -33,7 +33,7 @@ taskmanager:
cpu_requests: 1
cpu_limits: 1
memory_requests: 1024Mi
memory_limits: 1024Mi
memory_limits: 2300Mi

checkpoint_store_type: "s3"

Expand Down Expand Up @@ -156,8 +156,8 @@ base_config: |
task {
parallelism = 1
consumer.parallelism = 1
checkpointing.interval = 30000
checkpointing.pause.between.seconds = 5000
checkpointing.interval = 120000
checkpointing.pause.between.seconds = 120000
restart-strategy.attempts = 3
restart-strategy.delay = 30000 # in milli-seconds
}
Expand Down Expand Up @@ -229,23 +229,25 @@ lakehouse-connector:
base.path = "{{ .Values.global.hudi_metadata_bucket }}"
}
write {
tasks = 2
task.max.memory = 256
compaction.max.memory = 100
tasks = 1
task.max.memory = 512
compaction.max.memory = 512
}
metadata.enabled = true
compaction.enabled = true
delta.commits = 5
metadata.delta.commits = 5
write.tasks = 2
write.batch.size = 16
compaction.tasks = 2
write.batch.size = 256
compaction.tasks = 1
index.type = "BLOOM"
delta.commits = 2
delta.seconds = 10
write.batch.size = 256
compaction.tasks = 2
index.type = "BLOOM"
delta.commits = 2
delta.seconds = 600
write.lock.provider = "org.apache.hudi.client.transaction.lock.InProcessLockProvider"
write.concurrency.mode = "optimistic_concurrency_control"
metadata.index.column.stats.enabled = true
compression.codec = "snappy"
{{- if eq .Values.global.cloud_storage_provider "azure" }}
fs.atomic_creation.support = "wasbs"
Expand All @@ -256,12 +258,13 @@ lakehouse-connector:
}
flink-conf: |+
jobmanager.memory.flink.size: 1024m
taskmanager.memory.flink.size: 1024m
taskmanager.memory.flink.size: 2048m
taskmanager.numberOfTaskSlots: 1
jobManager.numberOfTaskSlots: 1
parallelism.default: 1
jobmanager.execution.failover-strategy: region
taskmanager.memory.network.fraction: 0.1
taskmanager.memory.managed.fraction: 0.3
heartbeat.timeout: 8000
heartbeat.interval: 5000
state.savepoints.dir: file:///tmp
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ data:
{{- if .Values.coordinator.config.memory.heapHeadroomPerNode }}
memory.heap-headroom-per-node={{ .Values.coordinator.config.memory.heapHeadroomPerNode }}
{{- end }}
discovery.uri=http://localhost:{{ .Values.service.port }}
discovery.uri=http://0.0.0.0:{{ .Values.service.port }}
{{- if .Values.server.config.authenticationType }}
http-server.authentication.type={{ .Values.server.config.authenticationType }}
{{- end }}
Expand Down