Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: persistence in helm chart for validator and boot node #10543

Merged
merged 1 commit into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions spartan/aztec-network/templates/boot-node.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,14 @@ spec:
matchLabels:
{{- include "aztec-network.selectorLabels" . | nindent 6 }}
app: boot-node
volumeClaimTemplates:
- metadata:
name: boot-node-data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: {{ .Values.bootNode.storageSize }}
template:
metadata:
labels:
Expand Down Expand Up @@ -119,6 +127,8 @@ spec:
mountPath: /shared/p2p
- name: config
mountPath: /shared/config
- name: boot-node-data
mountPath: {{ .Values.bootNode.dataDir }}
{{- if .Values.bootNode.deployContracts }}
- name: scripts-output
mountPath: /shared/contracts
Expand Down Expand Up @@ -182,6 +192,9 @@ spec:
emptyDir: {}
- name: config
emptyDir: {}
- name: boot-node-data
persistentVolumeClaim:
claimName: boot-node-data
{{- if .Values.bootNode.deployContracts }}
- name: scripts
configMap:
Expand Down
14 changes: 13 additions & 1 deletion spartan/aztec-network/templates/validator.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,14 @@ spec:
matchLabels:
{{- include "aztec-network.selectorLabels" . | nindent 6 }}
app: validator
volumeClaimTemplates:
- metadata:
name: validator-data
spec:
accessModes: [ "ReadWriteOnce" ]
resources:
requests:
storage: {{ .Values.validator.storageSize }}
template:
metadata:
labels:
Expand Down Expand Up @@ -53,7 +61,6 @@ spec:
{{- end }}

if [ "{{ .Values.validator.dynamicBootNode }}" = "true" ]; then
# Get the list of pod IPs for the validator service
echo "{{ include "aztec-network.pxeUrl" . }}" > /shared/pxe/pxe_url
else
until curl --silent --head --fail "${BOOT_NODE_HOST}/status" > /dev/null; do
Expand Down Expand Up @@ -136,6 +143,8 @@ spec:
mountPath: /shared/p2p
- name: config
mountPath: /shared/config
- name: validator-data
mountPath: {{ .Values.validator.dataDir }}
env:
- name: POD_IP
valueFrom:
Expand Down Expand Up @@ -197,6 +206,9 @@ spec:
emptyDir: {}
- name: config
emptyDir: {}
- name: validator-data
persistentVolumeClaim:
claimName: validator-data
---
# If this is not a public network, create a headless service for StatefulSet DNS entries
{{ if not .Values.network.public }}
Expand Down
6 changes: 5 additions & 1 deletion spartan/aztec-network/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,9 @@ bootNode:
outboxAddress: ""
feeJuiceAddress: ""
feeJuicePortalAddress: ""
storage: "8Gi"
stakingAssetAddress: ""
storageSize: "1Gi"
dataDir: "/data"

validator:
# If true, the validator will use its peers to serve as the boot node.
Expand Down Expand Up @@ -108,6 +110,8 @@ validator:
requests:
memory: "2Gi"
cpu: "200m"
storageSize: "1Gi"
dataDir: "/data"

proverNode:
externalHost: ""
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/values/4-validators-with-metrics.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ validator:
- 0x90F79bf6EB2c4f870365E785982E1f101E93b906
validator:
disabled: false
sequencer:
enforceTimeTable: false

bootNode:
validator:
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/values/exp-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ images:
pullPolicy: Always

validator:
storageSize: "100Gi"
replicas: 48
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
Expand Down Expand Up @@ -124,6 +125,7 @@ validator:

bootNode:
peerIdPrivateKey: 080212200ba8451c6d62b03c4441f0a466c0bce7a3a595f2cf50a055ded3305c77aa3af0
storageSize: "100Gi"
validator:
disabled: true

Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/values/rc-1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ telemetry:
otelCollectorEndpoint: http://35.197.100.168:4318

validator:
storageSize: "100Gi"
replicas: 48
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
Expand Down Expand Up @@ -125,6 +126,7 @@ bootNode:
peerIdPrivateKey: 080212200ba8451c6d62b03c4441f0a466c0bce7a3a595f2cf50a055ded3305c77aa3af0
validator:
disabled: true
storageSize: "100Gi"

proverAgent:
replicas: 8
Expand Down
2 changes: 2 additions & 0 deletions spartan/aztec-network/values/rc-2.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ telemetry:

validator:
replicas: 48
storageSize: "100Gi"
validatorKeys:
- 0xac0974bec39a17e36ba4a6b4d238ff944bacb478cbed5efcae784d7bf4f2ff80
- 0x59c6995e998f97a5a0044966f0945389dc9e86dae88c7a8412f4603b6b78690d
Expand Down Expand Up @@ -122,6 +123,7 @@ validator:
disabled: false

bootNode:
storageSize: "100Gi"
peerIdPrivateKey: 080212200ba8451c6d62b03c4441f0a466c0bce7a3a595f2cf50a055ded3305c77aa3af0
validator:
disabled: true
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/archiver/src/archiver/archiver.ts
Original file line number Diff line number Diff line change
Expand Up @@ -174,7 +174,7 @@ export class Archiver implements ArchiveSource {
pollingIntervalMs: config.archiverPollingIntervalMS ?? 10_000,
batchSize: config.archiverBatchSize ?? 100,
},
new ArchiverInstrumentation(telemetry, () => archiverStore.estimateSize()),
await ArchiverInstrumentation.new(telemetry, () => archiverStore.estimateSize()),
{ l1StartBlock, l1GenesisTime, epochDuration, slotDuration, ethereumSlotDuration },
);
await archiver.start(blockUntilSynced);
Expand Down
19 changes: 18 additions & 1 deletion yarn-project/archiver/src/archiver/instrumentation.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,14 @@ export class ArchiverInstrumentation {
private blockHeight: Gauge;
private blockSize: Gauge;
private syncDuration: Histogram;
private l1BlocksSynced: UpDownCounter;
private proofsSubmittedDelay: Histogram;
private proofsSubmittedCount: UpDownCounter;
private dbMetrics: LmdbMetrics;

private log = createLogger('archiver:instrumentation');

constructor(private telemetry: TelemetryClient, lmdbStats?: LmdbStatsCallback) {
private constructor(private telemetry: TelemetryClient, lmdbStats?: LmdbStatsCallback) {
const meter = telemetry.getMeter('Archiver');
this.blockHeight = meter.createGauge(Metrics.ARCHIVER_BLOCK_HEIGHT, {
description: 'The height of the latest block processed by the archiver',
Expand Down Expand Up @@ -59,6 +60,11 @@ export class ArchiverInstrumentation {
},
});

this.l1BlocksSynced = meter.createUpDownCounter(Metrics.ARCHIVER_L1_BLOCKS_SYNCED, {
description: 'Number of blocks synced from L1',
valueType: ValueType.INT,
});

this.dbMetrics = new LmdbMetrics(
meter,
{
Expand All @@ -77,13 +83,24 @@ export class ArchiverInstrumentation {
);
}

public static async new(telemetry: TelemetryClient, lmdbStats?: LmdbStatsCallback) {
const instance = new ArchiverInstrumentation(telemetry, lmdbStats);

instance.l1BlocksSynced.add(0);

await instance.telemetry.flush();

return instance;
}

public isEnabled(): boolean {
return this.telemetry.isEnabled();
}

public processNewBlocks(syncTimePerBlock: number, blocks: L2Block[]) {
this.syncDuration.record(Math.ceil(syncTimePerBlock));
this.blockHeight.record(Math.max(...blocks.map(b => b.number)));
this.l1BlocksSynced.add(blocks.length);
for (const block of blocks) {
this.blockSize.record(block.body.txEffects.length);
}
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/end-to-end/scripts/network_test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -180,5 +180,5 @@ docker run --rm --network=host \
-e GRAFANA_PASSWORD=$GRAFANA_PASSWORD \
-e DEBUG=${DEBUG:-""} \
-e LOG_JSON=1 \
-e LOG_LEVEL=verbose \
-e LOG_LEVEL=${LOG_LEVEL:-"verbose"} \
aztecprotocol/end-to-end:$AZTEC_DOCKER_TAG $TEST
30 changes: 26 additions & 4 deletions yarn-project/end-to-end/src/quality_of_service/alert_checker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ import * as yaml from 'js-yaml';
export interface AlertConfig {
alert: string;
expr: string;
start?: number;
end?: number;
step?: number;
for: string;
labels: Record<string, string>;
annotations: Record<string, string>;
Expand All @@ -18,7 +21,7 @@ export interface AlertCheckerConfig {

// This config is good if you're running the otel-lgtm stack locally
const DEFAULT_CONFIG: AlertCheckerConfig = {
grafanaEndpoint: 'http://localhost:3000/api/datasources/proxy/uid/prometheus/api/v1/query',
grafanaEndpoint: 'http://localhost:3000/api/datasources/proxy/uid/prometheus/api/v1',
grafanaCredentials: 'admin:admin',
};

Expand All @@ -41,10 +44,29 @@ export class AlertChecker {
return data.alerts;
}

private async queryGrafana(expr: string): Promise<number> {
private async queryGrafana({ expr, start, end, step }: AlertConfig): Promise<number> {
const credentials = Buffer.from(this.config.grafanaCredentials).toString('base64');

const response = await fetch(`${this.config.grafanaEndpoint}?query=${encodeURIComponent(expr)}`, {
let query = `query=${encodeURIComponent(expr)}`;
let action = 'query';

if (start) {
action = 'query_range';
query += `&start=${start}`;
}

if (end) {
query += `&end=${end}`;
}

if (step) {
query += `&step=${step}`;
}

const urlString = `${this.config.grafanaEndpoint}/${action}?${query}`;
this.logger.debug(`Querying Grafana: ${urlString}`);

const response = await fetch(urlString, {
headers: {
Authorization: `Basic ${credentials}`,
},
Expand All @@ -65,7 +87,7 @@ export class AlertChecker {
for (const alert of alerts) {
this.logger.info(`Checking alert: ${JSON.stringify(alert)}`);

const metricValue = await this.queryGrafana(alert.expr);
const metricValue = await this.queryGrafana(alert);
this.logger.info(`Metric value: ${metricValue}`);
if (metricValue > 0) {
this.logger.error(`Alert ${alert.alert} triggered! Value: ${metricValue}`);
Expand Down
22 changes: 15 additions & 7 deletions yarn-project/end-to-end/src/spartan/gating-passive.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,14 @@ const qosAlerts: AlertConfig[] = [
for: '10m',
annotations: {},
},
{
// Checks that we are not syncing from scratch each time we reboot
alert: 'ArchiverL1BlocksSynced',
expr: 'rate(aztec_archiver_l1_blocks_synced[1m]) > 0.5',
labels: { severity: 'error' },
for: '10m',
annotations: {},
},
];

const config = setupEnvironment(process.env);
Expand All @@ -52,6 +60,12 @@ describe('a test that passively observes the network in the presence of network
const MAX_MISSED_SLOT_PERCENT = 0.6;

afterAll(async () => {
await startPortForward({
resource: `svc/metrics-grafana`,
namespace: 'metrics',
containerPort: config.CONTAINER_METRICS_PORT,
hostPort: config.HOST_METRICS_PORT,
});
await runAlertCheck(config, qosAlerts, debugLogger);
});

Expand All @@ -69,12 +83,6 @@ describe('a test that passively observes the network in the presence of network
hostPort: HOST_ETHEREUM_PORT,
});

await startPortForward({
resource: `svc/metrics-grafana`,
namespace: 'metrics',
containerPort: config.CONTAINER_METRICS_PORT,
hostPort: config.HOST_METRICS_PORT,
});
const client = await createCompatibleClient(PXE_URL, debugLogger);
const ethCheatCodes = new EthCheatCodes(ETHEREUM_HOST);
const rollupCheatCodes = new RollupCheatCodes(
Expand All @@ -93,7 +101,7 @@ describe('a test that passively observes the network in the presence of network
// note, don't forget that normally an epoch doesn't need epochDuration worth of blocks,
// but here we do double duty:
// we want a handful of blocks, and we want to pass the epoch boundary
await awaitL2BlockNumber(rollupCheatCodes, epochDuration, 60 * 5, debugLogger);
await awaitL2BlockNumber(rollupCheatCodes, epochDuration, 60 * 6, debugLogger);

let deploymentOutput: string = '';
deploymentOutput = await applyNetworkShaping({
Expand Down
2 changes: 1 addition & 1 deletion yarn-project/end-to-end/src/spartan/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ const k8sLocalConfigSchema = z.object({
HOST_METRICS_PORT: z.coerce.number().min(1, 'HOST_METRICS_PORT env variable must be set'),
CONTAINER_METRICS_PORT: z.coerce.number().default(80),
GRAFANA_PASSWORD: z.string().min(1, 'GRAFANA_PASSWORD env variable must be set'),
METRICS_API_PATH: z.string().default('/api/datasources/proxy/uid/spartan-metrics-prometheus/api/v1/query'),
METRICS_API_PATH: z.string().default('/api/datasources/proxy/uid/spartan-metrics-prometheus/api/v1'),
SPARTAN_DIR: z.string().min(1, 'SPARTAN_DIR env variable must be set'),
K8S: z.literal('local'),
});
Expand Down
1 change: 1 addition & 0 deletions yarn-project/telemetry-client/src/metrics.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export const MEMPOOL_PROVER_QUOTE_COUNT = 'aztec.mempool.prover_quote_count';
export const MEMPOOL_PROVER_QUOTE_SIZE = 'aztec.mempool.prover_quote_size';

export const ARCHIVER_SYNC_DURATION = 'aztec.archiver.sync_duration';
export const ARCHIVER_L1_BLOCKS_SYNCED = 'aztec.archiver.l1_blocks_synced';
export const ARCHIVER_BLOCK_HEIGHT = 'aztec.archiver.block_height';
export const ARCHIVER_BLOCK_SIZE = 'aztec.archiver.block_size';
export const ARCHIVER_ROLLUP_PROOF_DELAY = 'aztec.archiver.rollup_proof_delay';
Expand Down
4 changes: 4 additions & 0 deletions yarn-project/telemetry-client/src/noop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,10 @@ export class NoopTelemetryClient implements TelemetryClient {
return Promise.resolve();
}

flush(): Promise<void> {
return Promise.resolve();
}

isEnabled() {
return false;
}
Expand Down
8 changes: 8 additions & 0 deletions yarn-project/telemetry-client/src/otel.ts
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,14 @@ export class OpenTelemetryClient implements TelemetryClient {
return true;
}

public async flush() {
await Promise.all([
this.meterProvider.forceFlush(),
this.loggerProvider.forceFlush(),
this.traceProvider instanceof NodeTracerProvider ? this.traceProvider.forceFlush() : Promise.resolve(),
]);
}

public async stop() {
const flushAndShutdown = async (provider: { forceFlush: () => Promise<void>; shutdown: () => Promise<void> }) => {
await provider.forceFlush();
Expand Down
7 changes: 6 additions & 1 deletion yarn-project/telemetry-client/src/telemetry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ import {
import * as Attributes from './attributes.js';
import * as Metrics from './metrics.js';

export { ValueType, Span } from '@opentelemetry/api';
export { Span, ValueType } from '@opentelemetry/api';

type ValuesOf<T> = T extends Record<string, infer U> ? U : never;

Expand Down Expand Up @@ -115,6 +115,11 @@ export interface TelemetryClient {
* Stops the telemetry client.
*/
stop(): Promise<void>;

/**
* Flushes the telemetry client.
*/
flush(): Promise<void>;
}

/** Objects that adhere to this interface can use @trackSpan */
Expand Down
Loading