Skip to content

Commit

Permalink
fix: multi-node metrics working (#9486)
Browse files Browse the repository at this point in the history
After this PR:
- switched to deployment of metrics helm chart, meaning we are just a
normal endpoint and don't try to do k8s-aware log scraping
- Now code that passes LOG_JSON and OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
will use winston otel transport to log to that otel endpoint (making its
way into grafana eventually)
- Adjustments in helm chart and winston transport side enabling logging,
see working here:
<img width="1228" alt="Screenshot 2024-10-29 at 8 38 01 PM"
src="https://github.com/user-attachments/assets/c141eac7-8ea6-4ed6-9ba9-5b181e89775c">

This was from the scripts/run_native_testnet_with_metrics.sh script that
got the publicly available metrics deployment and pointed the local
testnet scripts at it.
- minor fix for earthly s3 caching
- new post_deploy_spartan.sh script that runs 'network-bootstrap' that
initializes the network with some key test contracts
- refactor logging a bit to make it the negative patterns useful for all
logging pathways + extract logic added for offsite demo into its own
functional wrapper for adding 'fixed data'
- Closes #9234
  • Loading branch information
ludamad authored Oct 30, 2024
1 parent bdff73a commit fd974e1
Show file tree
Hide file tree
Showing 26 changed files with 431 additions and 81 deletions.
8 changes: 4 additions & 4 deletions build-system/s3-cache-scripts/earthly-s3-cache.sh
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ function s3_upload() {
if [ "${S3_BUILD_CACHE_UPLOAD:-true}" = "false" ] || [ "${AWS_ACCESS_KEY_ID}" == "" ] ; then
return 0 # exit silently
fi
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts || echo "WARNING: S3 upload failed!" >&2
}
function minio_download() {
if [ -z "$S3_BUILD_CACHE_MINIO_URL" ] ; then
Expand All @@ -35,7 +35,7 @@ function minio_upload() {
fi
# minio is S3-compatible
S3_BUILD_CACHE_AWS_PARAMS="--endpoint-url $S3_BUILD_CACHE_MINIO_URL" AWS_SECRET_ACCESS_KEY=minioadmin AWS_ACCESS_KEY_ID=minioadmin \
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts
/usr/src/build-system/s3-cache-scripts/cache-upload.sh "$FILE" $build_artifacts || echo "WARNING Minio upload failed!" >&2
}

# commands
Expand All @@ -53,5 +53,5 @@ if ! bash -c "$command" ; then
exit 1 # we have failed to build, don't continue
fi

minio_upload || echo "Minio upload failed!"
s3_upload || echo "S3 upload failed!"
minio_upload
s3_upload
25 changes: 25 additions & 0 deletions scripts/run_native_testnet_with_metrics.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
set -eu

NAMESPACE=${1:-staging}

echo "Trying to port forward. NOTE: Must be using a production k8s context with metrics chart."

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
OTEL_URL=http://$(get_load_balancer_url metrics metrics-opentelemetry-collector):4318

export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=$OTEL_URL/v1/metrics
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=$OTEL_URL/v1/trace
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT=$OTEL_URL/v1/logs
export LOG_JSON=1

# re-enter script dir
cd $(dirname "${BASH_SOURCE[0]}")
./run_native_testnet.sh $@
7 changes: 7 additions & 0 deletions spartan/aztec-network/templates/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,13 @@ http://{{ include "aztec-network.fullname" . }}-metrics.{{ .Release.Namespace }}
{{- end -}}
{{- end -}}

{{- define "aztec-network.otelCollectorLogsEndpoint" -}}
{{- if .Values.telemetry.enabled -}}
{{- if .Values.telemetry.otelCollectorEndpoint -}}
{{- .Values.telemetry.otelCollectorEndpoint -}}/v1/logs
{{- end -}}
{{- end -}}
{{- end -}}

{{- define "helpers.flag" -}}
{{- $name := index . 0 -}}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ spec:
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }}
ports:
- containerPort: {{ .Values.bootNode.service.nodePort }}
- containerPort: {{ .Values.bootNode.service.p2pTcpPort }}
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,8 @@ spec:
value: {{ include "aztec-network.otelCollectorMetricsEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_TRACES_ENDPOINT
value: {{ include "aztec-network.otelCollectorTracesEndpoint" . | quote }}
- name: OTEL_EXPORTER_OTLP_LOGS_ENDPOINT
value: {{ include "aztec-network.otelCollectorLogsEndpoint" . | quote }}
ports:
- containerPort: {{ .Values.validator.service.nodePort }}
- containerPort: {{ .Values.validator.service.p2pTcpPort }}
Expand Down
4 changes: 3 additions & 1 deletion spartan/metrics/install-prod.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
#!/bin/bash
set -eu

helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace --atomic
cd "$(dirname "${BASH_SOURCE[0]}")"

helm upgrade metrics . -n metrics --values "./values/prod.yaml" --install --create-namespace --atomic $@
2 changes: 2 additions & 0 deletions spartan/metrics/install.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
#!/bin/bash
set -eu

cd "$(dirname "${BASH_SOURCE[0]}")"

helm repo add open-telemetry https://open-telemetry.github.io/opentelemetry-helm-charts
helm repo add grafana https://grafana.github.io/helm-charts
helm repo add prometheus-community https://prometheus-community.github.io/helm-charts
Expand Down
5 changes: 1 addition & 4 deletions spartan/metrics/values.yaml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
opentelemetry-collector:
mode: daemonset
mode: deployment

service:
enabled: true
Expand Down Expand Up @@ -28,9 +28,6 @@ opentelemetry-collector:
protocol: TCP

presets:
logsCollection:
enabled: true
includeCollectorLogs: true
kubernetesAttributes:
enabled: true
config:
Expand Down
15 changes: 9 additions & 6 deletions spartan/scripts/deploy_spartan.sh
Original file line number Diff line number Diff line change
@@ -1,10 +1,15 @@
#!/bin/bash
set -eux
set -eu
set -o pipefail

TAG=$1
VALUES=$2
NAMESPACE=${3:-spartan}
PROD=${4:-true}
PROD_ARGS=""
if [ "$PROD" = "true" ] ; then
PROD_ARGS="--set network.public=true --set telemetry.enabled=true --set telemetry.otelCollectorEndpoint=http://metrics-opentelemetry-collector.metrics:4318"
fi
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$TAG" ]; then
Expand Down Expand Up @@ -46,16 +51,14 @@ function upgrade() {
helm template $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--set images.aztec.image="$IMAGE" \
--set network.public=true
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml $PROD_ARGS \
--set images.aztec.image="$IMAGE"
else
helm upgrade --install $NAMESPACE $SCRIPT_DIR/../aztec-network \
--namespace $NAMESPACE \
--create-namespace \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml \
--values $SCRIPT_DIR/../aztec-network/values/$VALUES.yaml $PROD_ARGS \
--set images.aztec.image="$IMAGE" \
--set network.public=true \
--wait \
--wait-for-jobs=true \
--timeout=30m 2>&1
Expand Down
41 changes: 41 additions & 0 deletions spartan/scripts/post_deploy_spartan.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#!/bin/bash
# Targets a running cluster and deploys example contracts for testing
set -eu
set -o pipefail

echo "Bootstrapping network with test contracts"

NAMESPACE=${1:-spartan}
TAG=${2:-latest}
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"

if [ -z "$NAMESPACE" ]; then
echo "Usage: $0 (optional: <namespace>)"
echo "Example: $0 devnet"
exit 1
fi

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
export BOOTNODE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-boot-node-lb-tcp"):8080
export PXE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-pxe-lb"):8080
export ETHEREUM_HOST=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-ethereum-lb"):8545

echo "BOOTNODE_URL: $BOOTNODE_URL"
echo "PXE_URL: $PXE_URL"
echo "ETHEREUM_HOST: $ETHEREUM_HOST"

echo "Bootstrapping contracts for test network. NOTE: This took one hour last run."
# hack to ensure L2 contracts are considered deployed
docker run aztecprotocol/aztec:$TAG bootstrap-network \
--rpc-url $BOOTNODE_URL \
--l1-rpc-url $ETHEREUM_HOST \
--l1-chain-id 31337 \
--l1-private-key 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80 \
--json | tee ./basic_contracts.json
15 changes: 11 additions & 4 deletions spartan/scripts/test_spartan.sh
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,22 @@ fi

echo "Note: Repo should be bootstrapped with ./bootstrap.sh fast."

# Helper function to get load balancer URL based on namespace and service name
function get_load_balancer_url() {
local namespace=$1
local service_name=$2
kubectl get svc -n $namespace -o jsonpath="{.items[?(@.metadata.name=='$service_name')].status.loadBalancer.ingress[0].hostname}"
}

# Fetch the service URLs based on the namespace for injection in the test-transfer.sh
export BOOTNODE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-boot-node-lb-tcp')].status.loadBalancer.ingress[0].hostname}"):8080
export PXE_URL=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-pxe-lb')].status.loadBalancer.ingress[0].hostname}"):8080
export ETHEREUM_HOST=http://$(kubectl get svc -n $NAMESPACE -o jsonpath="{.items[?(@.metadata.name=='$NAMESPACE-aztec-network-ethereum-lb')].status.loadBalancer.ingress[0].hostname}"):8545
export BOOTNODE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-boot-node-lb-tcp"):8080
export PXE_URL=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-pxe-lb"):8080
export ETHEREUM_HOST=http://$(get_load_balancer_url $NAMESPACE "$NAMESPACE-aztec-network-ethereum-lb"):8545

echo "BOOTNODE_URL: $BOOTNODE_URL"
echo "PXE_URL: $PXE_URL"
echo "ETHEREUM_HOST: $ETHEREUM_HOST"

# hack to ensure L2 contracts are considered deployed
touch $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/state/l2-contracts.env
bash -x $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/test-transfer.sh
bash -x $SCRIPT_DIR/../../yarn-project/end-to-end/scripts/native-network/test-4epochs.sh
3 changes: 2 additions & 1 deletion yarn-project/Earthfile
Original file line number Diff line number Diff line change
Expand Up @@ -283,6 +283,7 @@ prover-client-test:
# Running this inside the main builder as the point is not to run this through dockerization.
network-test:
ARG test=./test-transfer.sh
ARG validators=3
FROM +build
WORKDIR /usr/src/
# Bare minimum git setup to run 'git rev-parse --show-toplevel'
Expand All @@ -299,7 +300,7 @@ network-test:
./ethereum.sh \
"./prover-node.sh 8078 false" \
./pxe.sh \
"./validators.sh 3"
"./validators.sh $validators"

publish-npm:
FROM +build
Expand Down
1 change: 1 addition & 0 deletions yarn-project/aztec/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
"@aztec/telemetry-client": "workspace:^",
"@aztec/txe": "workspace:^",
"@aztec/types": "workspace:^",
"@opentelemetry/winston-transport": "^0.7.0",
"@types/chalk": "^2.2.0",
"abitype": "^0.8.11",
"chalk": "^5.3.0",
Expand Down
28 changes: 4 additions & 24 deletions yarn-project/aztec/src/logging.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import { currentLevel, onLog, setLevel } from '@aztec/foundation/log';

import { OpenTelemetryTransportV3 } from '@opentelemetry/winston-transport';
import * as path from 'path';
import * as process from 'process';
import * as winston from 'winston';
Expand Down Expand Up @@ -30,36 +31,15 @@ function createWinstonLocalFileLogger() {
});
}

function extractNegativePatterns(debugString: string): string[] {
return (
debugString
.split(',')
.filter(p => p.startsWith('-'))
// Remove the leading '-' from the pattern
.map(p => p.slice(1))
// Remove any '*' from the pattern
.map(p => p.replace('*', ''))
);
}

/** Creates a winston logger that logs everything to stdout in json format */
function createWinstonJsonStdoutLogger(
debugString: string = process.env.DEBUG ??
'aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*',
) {
const ignorePatterns = extractNegativePatterns(debugString);
const ignoreAztecPattern = format(info => {
if (ignorePatterns.some(pattern => info.module.startsWith(pattern))) {
return false; // Skip logging this message
}
return info;
});
function createWinstonJsonStdoutLogger() {
return winston.createLogger({
level: currentLevel,
transports: [
new winston.transports.Console({
format: format.combine(format.timestamp(), ignoreAztecPattern(), format.json()),
format: format.combine(format.timestamp(), format.json()),
}),
new OpenTelemetryTransportV3(),
],
});
}
Expand Down
5 changes: 3 additions & 2 deletions yarn-project/end-to-end/scripts/native-network/boot-node.sh
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,9 @@ export P2P_TCP_ANNOUNCE_ADDR="127.0.0.1:40400"
export P2P_UDP_ANNOUNCE_ADDR="127.0.0.1:40400"
export P2P_TCP_LISTEN_ADDR="0.0.0.0:40400"
export P2P_UDP_LISTEN_ADDR="0.0.0.0:40400"
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT=""
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT=""
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-}"
export VALIDATOR_PRIVATE_KEY="0x47e179ec197488593b187f80a00eb0da91f1b9d0b13f8733639f19c30a34926a"
REPO=$(git rev-parse --show-toplevel)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,6 @@ echo "Done waiting."
# Set environment variables
export ETHEREUM_HOST="http://127.0.0.1:8545"
export AZTEC_NODE_URL="http://127.0.0.1:8080"
export LOG_JSON="1"
export LOG_LEVEL=${LOG_LEVEL:-"debug"}
export DEBUG="aztec:*,-aztec:avm_simulator*,-aztec:libp2p_service*,-aztec:circuits:artifact_hash,-json-rpc*,-aztec:l2_block_stream,-aztec:world-state:*"
export BOT_PRIVATE_KEY="0xcafe"
Expand Down
3 changes: 3 additions & 0 deletions yarn-project/end-to-end/scripts/native-network/validator.sh
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,9 @@ export P2P_TCP_ANNOUNCE_ADDR="127.0.0.1:$P2P_PORT"
export P2P_UDP_ANNOUNCE_ADDR="127.0.0.1:$P2P_PORT"
export P2P_TCP_LISTEN_ADDR="0.0.0.0:$P2P_PORT"
export P2P_UDP_LISTEN_ADDR="0.0.0.0:$P2P_PORT"
export OTEL_EXPORTER_OTLP_METRICS_ENDPOINT="${OTEL_EXPORTER_OTLP_METRICS_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_TRACES_ENDPOINT="${OTEL_EXPORTER_OTLP_TRACES_ENDPOINT:-}"
export OTEL_EXPORTER_OTLP_LOGS_ENDPOINT="${OTEL_EXPORTER_OTLP_LOGS_ENDPOINT:-}"

# Add L1 validator
# this may fail, so try 3 times
Expand Down
1 change: 1 addition & 0 deletions yarn-project/foundation/src/config/env_var.ts
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ export type EnvVar =
| 'NOMISMATOKOPIO_CONTRACT_ADDRESS'
| 'OTEL_EXPORTER_OTLP_METRICS_ENDPOINT'
| 'OTEL_EXPORTER_OTLP_TRACES_ENDPOINT'
| 'OTEL_EXPORTER_OTLP_LOGS_ENDPOINT'
| 'OTEL_SERVICE_NAME'
| 'OUTBOX_CONTRACT_ADDRESS'
| 'P2P_BLOCK_CHECK_INTERVAL_MS'
Expand Down
Loading

0 comments on commit fd974e1

Please sign in to comment.