Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: stable deployments for spartan #9147

Merged
merged 6 commits into from
Oct 10, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 10 additions & 3 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -67,13 +67,20 @@ http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }}
{{- end -}}

{{- define "aztec-network.otelCollectorMetricsEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/metrics
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/metrics
{{- end -}}
{{- end -}}
{{- end -}}

{{- define "aztec-network.otelCollectorTracesEndpoint" -}}
http://metrics-opentelemetry-collector.metrics:4318/v1/traces
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/traces
{{- end -}}
{{- end -}}
{{- end -}}



{{- define "helpers.flag" -}}
Expand Down
5 changes: 4 additions & 1 deletion spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -31,11 +31,13 @@ spec:
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
- name: deploy-contracts
image: {{ .Values.images.aztec.image }}
command:
Expand All @@ -56,10 +58,11 @@ spec:
- name: boot-node
image: {{ .Values.images.aztec.image }}
command:
# sleep to allow dns name to be resolvable
[
"/bin/bash",
"-c",
"source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
"sleep 30 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer --pxe",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💤

]
livenessProbe:
exec:
Expand Down
2 changes: 1 addition & 1 deletion spartan/aztec-network/templates/l2-contracts.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ metadata:
data:
deploy-contracts.sh: |
#!/bin/sh
set -e
set -ex

# Run the deploy-l1-contracts command and capture the output
output=$(node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js deploy-l1-contracts --validators {{ join "," .Values.validator.validatorAddresses | quote }})
Expand Down
25 changes: 25 additions & 0 deletions spartan/aztec-network/templates/prover-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@ spec:
app: prover-node
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we are likely to change node in the future, the grep "anvil" here should probably also be configurable

echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
- name: configure-prover-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand All @@ -33,6 +57,7 @@ spec:
env:
- name: ETHEREUM_HOST
value: {{ include "aztec-network.ethereumHost" . | quote }}

containers:
- name: prover-node
image: "{{ .Values.images.aztec.image }}"
Expand Down
11 changes: 11 additions & 0 deletions spartan/aztec-network/templates/pxe.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,17 @@ spec:
{{- include "aztec-network.selectorLabels" . | nindent 8 }}
app: pxe
spec:
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done
containers:
- name: pxe
image: "{{ .Values.images.aztec.image }}"
Expand Down
27 changes: 26 additions & 1 deletion spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,31 @@ spec:
# We expect the validators to have already been added to the smart contract by this point - but this container still needs
# to be run in order to get the values
initContainers:
- name: wait-for-boot-node
image: {{ .Values.images.curl.image }}
command:
- /bin/sh
- -c
- |
until curl -s -X POST -H 'Content-Type: application/json' \
-d '{"jsonrpc":"2.0","method":"web3_clientVersion","params":[],"id":67}' \
{{ include "aztec-network.ethereumHost" . }} | grep -q anvil; do
echo "Waiting for Ethereum node..."
sleep 5
done
echo "Ethereum node is ready!"
{{- if .Values.telemetry.enabled }}
until curl --head --silent {{ include "aztec-network.otelCollectorMetricsEndpoint" . }} > /dev/null; do
echo "Waiting for OpenTelemetry collector..."
sleep 5
done
echo "OpenTelemetry collector is ready!"
{{- end }}
until curl --head --silent {{ include "aztec-network.bootNodeUrl" . }}/status; do
echo "Waiting for boot node..."
sleep 5
done

- name: configure-validator-env
image: "{{ .Values.images.aztec.image }}"
imagePullPolicy: {{ .Values.images.aztec.pullPolicy }}
Expand Down Expand Up @@ -50,7 +75,7 @@ spec:
command:
- "/bin/bash"
- "-c"
- "source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
- "sleep 10 && source /shared/contracts.env && env && node --no-warnings /usr/src/yarn-project/aztec/dest/bin/index.js start --node --archiver --sequencer"
Copy link
Collaborator

@ludamad ludamad Oct 10, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally all sleeps would be wait loops otherwise we invite mysterious flakes (and has been the culprit behind many so far), but if it gets us unblocked can be iterated on

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah this is to allow the k8s DNS name to get set up. It runs before the node even starts, so shouldn't invite too much flake.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

still ideal to query k8s dns if possible, but yeah nbd

volumeMounts:
- name: shared-volume
mountPath: /shared
Expand Down
26 changes: 8 additions & 18 deletions spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,10 @@ network:
public: false
enableBots: true

telemetry:
enabled: false
otelCollectorEndpoint:

images:
aztec:
image: aztecprotocol/aztec
Expand Down Expand Up @@ -31,13 +35,14 @@ bootNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "4"
storage: "8Gi"

validator:
replicas: 1
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
validatorAddresses:
- 0xf39Fd6e51aad88F6F4ce6aB8827279cffFb92266
service:
p2pPort: 40400
nodePort: 8080
Expand All @@ -54,9 +59,6 @@ validator:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "28Gi"
cpu: "7"
storage: "8Gi"

proverNode:
Expand All @@ -71,9 +73,6 @@ proverNode:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "120Gi"
cpu: "15"
storage: "8Gi"

pxe:
Expand All @@ -93,9 +92,6 @@ pxe:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

bot:
logLevel: "debug"
Expand Down Expand Up @@ -124,9 +120,6 @@ bot:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"

ethereum:
replicas: 1
Expand All @@ -152,7 +145,4 @@ ethereum:
requests:
memory: "2Gi"
cpu: "200m"
limits:
memory: "4Gi"
cpu: "1"
storage: "8Gi"
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/16-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

bootNode:
sequencer:
minTxsPerBlock: 4
Expand Down
10 changes: 10 additions & 0 deletions spartan/aztec-network/values/48-validators.yaml
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
##########
# BEWARE #
##########
# You need to deploy the metrics helm chart before using this values file.
# head to spartan/metrics and run `./install.sh`
# (then `./forward.sh` if you want to see it)
telemetry:
enabled: true
otelCollectorEndpoint: http://metrics-opentelemetry-collector.metrics:4318

validator:
debug: "aztec:*,-aztec:avm_simulator:*,-aztec:libp2p_service"
replicas: 48
Expand Down
Loading