From def0bfb04fdb21edbbf76f8668e7cd8a42274526 Mon Sep 17 00:00:00 2001 From: Hayden <22327045+hbjydev@users.noreply.github.com> Date: Thu, 2 May 2024 00:17:11 +0100 Subject: [PATCH] Refactor ECS Agent & Alloy configurations (#7) * feat: refactor alloy configs * fix: remove unused input from alloy module * fix(alloy): name for otlp exporter was invalid, fix * fix: update casing for ecs-agent alloy config * fix(alloy): get config working :tada: * feat(ci): deploy on main branch * fix(ci): drop ifs for testing porpoises * fix(ci): add --quiet to aws s3 cp call * fix(ci): reintroduce if: main branch checks * fix(ci): reintroduce cachix action * feat(ci): make the workflow cleaner to read * feat(ci): job-end summary * fix(ci): remove check to test with * fix(ci): add check back in --- .github/workflows/flake.yaml | 37 +++++++++--- ci-build-publish.sh | 87 +++++++++++++++++++++++++++ justfile | 32 +--------- modules/mixins/alloy/config.alloy | 42 +++++++++---- modules/mixins/alloy/default.nix | 2 +- modules/mixins/docker/config.alloy | 12 ++++ modules/mixins/docker/default.nix | 7 +++ modules/mixins/ecs-agent/config.alloy | 19 ++---- modules/mixins/ecs-agent/default.nix | 2 +- 9 files changed, 171 insertions(+), 69 deletions(-) create mode 100644 ci-build-publish.sh create mode 100644 modules/mixins/docker/config.alloy diff --git a/.github/workflows/flake.yaml b/.github/workflows/flake.yaml index c501064..e0d0e1d 100644 --- a/.github/workflows/flake.yaml +++ b/.github/workflows/flake.yaml @@ -6,8 +6,8 @@ on: branches: - main -# env: -# CACHIX_BINARY_CACHE: altf4llc-os +env: + CACHIX_BINARY_CACHE: altf4llc-os jobs: check: @@ -16,10 +16,10 @@ jobs: - uses: cachix/install-nix-action@v25 with: enable_kvm: true - # - uses: cachix/cachix-action@v14 - # with: - # authToken: ${{ secrets.ALTF4LLC_CACHIX_AUTH_TOKEN }} - # name: ${{ env.CACHIX_BINARY_CACHE }} + - uses: cachix/cachix-action@v14 + with: + authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} + name: ${{ env.CACHIX_BINARY_CACHE }} - uses: actions/checkout@v4 - run: nix develop -c just check @@ -27,6 +27,9 @@ jobs: needs: - check runs-on: ubuntu-latest + permissions: + contents: read + id-token: write strategy: matrix: profile: @@ -36,9 +39,23 @@ jobs: - uses: cachix/install-nix-action@v25 with: enable_kvm: true - # - uses: cachix/cachix-action@v14 - # with: - # authToken: ${{ secrets.ALTF4LLC_CACHIX_AUTH_TOKEN }} - # name: ${{ env.CACHIX_BINARY_CACHE }} + + - uses: cachix/cachix-action@v14 + with: + authToken: ${{ secrets.CACHIX_AUTH_TOKEN }} + name: ${{ env.CACHIX_BINARY_CACHE }} + - uses: actions/checkout@v4 - run: nix develop -c just build "${{ matrix.profile }}" + + - uses: aws-actions/configure-aws-credentials@v4 + if: github.ref == 'refs/heads/main' + with: + aws-region: us-west-2 + role-to-assume: arn:aws:iam::677459762413:role/altf4llc-gha-vms-nix + + - if: github.ref == 'refs/heads/main' + run: aws sts get-caller-identity + + - if: github.ref == 'refs/heads/main' + run: nix develop -c just publish-ami "${{ matrix.profile }}" diff --git a/ci-build-publish.sh b/ci-build-publish.sh new file mode 100644 index 0000000..312c25d --- /dev/null +++ b/ci-build-publish.sh @@ -0,0 +1,87 @@ +#!/usr/bin/env bash +set -euo pipefail + +bucket="$1" +profile="$2" + +ci="${CI:-false}" + +current_group="" +group() { + # Starts a group (GitHub Actions) + current_group="$1" + if [[ "$ci" == "true" ]]; then + echo "::group::$1"; + else + echo "> $1" + fi +} + +endgroup() { + # Ends the group (GitHub Actions) + if [[ "$ci" == "true" ]]; then + echo "::endgroup::" + else + echo "> Finished $current_group" + fi + current_group="" +} + +ciout() { + # Sets the value as a job output + if [[ "$ci" == "true" ]]; then echo "$1=$2" >> "$GITHUB_OUTPUT"; fi +} + +cisum() { + if [[ "$ci" == "true" ]]; then + echo "$@" >> "$GITHUB_STEP_SUMMARY" + fi +} + +build_time=$(date +%s) +image_name="altf4llc-$profile-$build_time" +ciout image_name "$image_name" + +group "Building source VHD" +derivation=$(just build "$profile") +output=$(echo "$derivation" | jq -r '.[].outputs.out') +image_path=$(cd "$output" && ls -- *.vhd) +endgroup + +group "Uploading VHD to S3" +aws s3 cp "$output/$image_path" "s3://$bucket/$image_name.vhd" --quiet +endgroup + +group "Importing VHD as snapshot in EC2" +task_id=$(aws ec2 import-snapshot --disk-container "Format=VHD,UserBucket={S3Bucket=$bucket,S3Key=$image_name.vhd}" --output json | jq -r ".ImportTaskId") + +echo "Waiting for snapshot import to complete." +until [[ $(aws ec2 describe-import-snapshot-tasks --import-task-ids "$task_id" --output json | jq -r '.ImportSnapshotTasks[].SnapshotTaskDetail.Status') == "completed" ]]; do + echo "Snapshot is not imported yet, waiting..." + sleep 5 +done + +snapshot_id=$(aws ec2 describe-import-snapshot-tasks --import-task-ids "$task_id" --output json | jq -r '.ImportSnapshotTasks[].SnapshotTaskDetail.SnapshotId') + +echo "New snapshot is $snapshot_id." +ciout snapshot_id "$snapshot_id" +endgroup + +echo "::group::Registering new AMI" +ami_id=$(aws ec2 register-image --architecture x86_64 --ena-support --name "$image_name" --description "A NixOS AMI: {{profile}}" --block-device-mappings "DeviceName=/dev/sda1,Ebs={SnapshotId=$snapshot_id}" --root-device-name /dev/sda1 | jq .ImageId) +echo "AMI is registered: $ami_id" +ciout ami_id "$ami_id" +echo "::endgroup::" + +echo "::group::Cleaning up image VHD from bucket" +aws s3 rm "s3://$bucket/$image_name.vhd" +echo "::endgroup::" + +cisum "# :rocket: AMI build successful" +cisum "" +cisum "An image was successfully built for Nix profile \`$profile\`." +cisum "" +cisum "- Build time: \`$build_time\`" +cisum "- VHD import job ID: \`$task_id\`" +cisum "- AMI ID: \`$ami_id\`" +cisum "- Snapshot ID: \`$snapshot_id\`" diff --git a/justfile b/justfile index f3d2a93..a6bf87e 100644 --- a/justfile +++ b/justfile @@ -7,34 +7,4 @@ build profile: nix build --json --print-build-logs --no-link '.#{{profile}}' publish-ami profile: - #!/usr/bin/env bash - set -euo pipefail - BUILD_TIME=$(date +%s) - IMAGE_NAME="altf4llc-{{profile}}-$BUILD_TIME" - - DERIVATION=$(just build {{profile}}) - OUTPUT=$(echo "$DERIVATION" | jq -r '.[].outputs.out') - IMAGE_PATH=$(cd "$OUTPUT" && ls *.vhd) - - echo "Uploading VHD to S3." - aws s3 cp "$OUTPUT/$IMAGE_PATH" "s3://{{ami_bucket}}/$IMAGE_NAME.vhd" - - echo "Starting snapshot import." - TASK_ID=$(aws ec2 import-snapshot --disk-container "Format=VHD,UserBucket={S3Bucket={{ami_bucket}},S3Key=$IMAGE_NAME.vhd}" --output json | jq -r ".ImportTaskId") - - echo "Waiting for snapshot import to complete." - until [[ $(aws ec2 describe-import-snapshot-tasks --import-task-ids "$TASK_ID" --output json | jq -r '.ImportSnapshotTasks[].SnapshotTaskDetail.Status') == "completed" ]]; do - echo "Snapshot is not imported yet, waiting..." - sleep 5 - done - - SNAPSHOT_ID=$(aws ec2 describe-import-snapshot-tasks --import-task-ids "$TASK_ID" --output json | jq -r '.ImportSnapshotTasks[].SnapshotTaskDetail.SnapshotId') - - echo "New snapshot is $SNAPSHOT_ID." - - AMI_ID=$(aws ec2 register-image --architecture x86_64 --ena-support --name "$IMAGE_NAME" --description "A NixOS AMI: {{profile}}" --block-device-mappings "DeviceName=/dev/sda1,Ebs={SnapshotId=$SNAPSHOT_ID}" --root-device-name /dev/sda1 | jq .ImageId) - - echo "AMI is registered: $AMI_ID" - - echo "Cleaning up image VHD from bucket" - aws s3 rm "s3://{{ami_bucket}}/$IMAGE_NAME.vhd" + bash ./ci-build-publish.sh "{{ami_bucket}}" "{{profile}}" diff --git a/modules/mixins/alloy/config.alloy b/modules/mixins/alloy/config.alloy index cc7c5ee..e79c2ba 100644 --- a/modules/mixins/alloy/config.alloy +++ b/modules/mixins/alloy/config.alloy @@ -1,5 +1,5 @@ -# We ship everything over OTLP -otelcol.exporter.otlp "gc-fwd" { +// We ship everything over OTLP +otelcol.exporter.otlp "gc_fwd" { client { endpoint = "gc-fwd.altf4.internal:4317" tls { @@ -8,21 +8,26 @@ otelcol.exporter.otlp "gc-fwd" { } } -# Convert Prometheus data for OTLP +// Convert Prometheus data for OTLP otelcol.receiver.prometheus "default" { output { - metrics = [otelcol.exporter.otlp.gc-fwd.input] + metrics = [otelcol.exporter.otlp.gc_fwd.input] } } -# Convert Loki data for OTLP +// Convert OTLP data for Prometheus +otelcol.exporter.prometheus "default" { + forward_to = [prometheus.relabel.instance.receiver] +} + +// Convert Loki data for OTLP otelcol.receiver.loki "default" { output { - logs = [otelcol.exporter.otlp.gc-fwd.input] + logs = [otelcol.exporter.otlp.gc_fwd.input] } } -# Extract Systemd unit from journal entry +// Extract Systemd unit from journal entry loki.relabel "journal" { forward_to = [] @@ -30,16 +35,31 @@ loki.relabel "journal" { source_labels = ["__journal__systemd_unit"] target_label = "unit" } + + rule { + source_labels = ["__journal_container_name"] + target_label = "container_name" + } + + rule { + source_labels = ["__journal_image_name"] + target_label = "container_image" + } + + rule { + source_labels = ["__journal_container_id"] + target_label = "container_id" + } } -# Fetch journal entries +// Fetch journal entries loki.source.journal "journal" { forward_to = [otelcol.receiver.loki.default.receiver] relabel_rules = loki.relabel.journal.rules labels = {component = "loki.source.journal"} } -# Set instance label to the hostname +// Set instance label to the hostname prometheus.relabel "instance" { forward_to = [otelcol.receiver.prometheus.default.receiver] rule { @@ -48,7 +68,7 @@ prometheus.relabel "instance" { } } -# Export system metrics +// Export system metrics prometheus.exporter.unix "host" { procfs_path = "/host/proc" sysfs_path = "/host/sys" @@ -60,7 +80,7 @@ prometheus.exporter.unix "host" { } } -# Scrape system metrics +// Scrape system metrics prometheus.scrape "host" { targets = prometheus.exporter.unix.host.targets forward_to = [prometheus.relabel.instance.receiver] diff --git a/modules/mixins/alloy/default.nix b/modules/mixins/alloy/default.nix index b19c898..16aa45b 100644 --- a/modules/mixins/alloy/default.nix +++ b/modules/mixins/alloy/default.nix @@ -1,4 +1,4 @@ -{config, ...}: { +{...}: { # see TODO further down imports = [../docker]; diff --git a/modules/mixins/docker/config.alloy b/modules/mixins/docker/config.alloy new file mode 100644 index 0000000..2c8ed66 --- /dev/null +++ b/modules/mixins/docker/config.alloy @@ -0,0 +1,12 @@ +prometheus.exporter.cadvisor "cadvisor" { + docker_host = "unix:///var/run/docker.sock" + storage_duration = "5m" +} + +prometheus.scrape "cadvisor" { + targets = prometheus.exporter.cadvisor.cadvisor.targets + forward_to = [prometheus.relabel.instance.receiver] + scrape_interval = "30s" +} + +// vim:ft=hcl diff --git a/modules/mixins/docker/default.nix b/modules/mixins/docker/default.nix index db1a85e..58c8103 100644 --- a/modules/mixins/docker/default.nix +++ b/modules/mixins/docker/default.nix @@ -1,4 +1,11 @@ {...}: { virtualisation.docker.enable = true; virtualisation.oci-containers.backend = "docker"; + + # Monitoring + environment.etc."alloy/docker.alloy" = { + source = ./config.alloy; + mode = "0440"; + user = "root"; + }; } diff --git a/modules/mixins/ecs-agent/config.alloy b/modules/mixins/ecs-agent/config.alloy index c5f81fc..ede3c60 100644 --- a/modules/mixins/ecs-agent/config.alloy +++ b/modules/mixins/ecs-agent/config.alloy @@ -1,15 +1,4 @@ -prometheus.exporter.cadvisor "cadvisor" { - docker_host = "unix:///var/run/docker.sock" - storage_duration = "5m" -} - -prometheus.scrape "cadvisor" { - targets = prometheus.exporter.cadvisor.cadvisor.targets - forward_to = [prometheus.relabel.instance.receiver] - scrape_interval = "30s" -} - -prometheus.scrape "ecs-agent" { +prometheus.scrape "ecs_agent" { targets = [ {"__address__" = "127.0.0.1:51680", instance = env("HOSTNAME")}, ] @@ -24,9 +13,9 @@ otelcol.receiver.otlp "otlp" { http {} output { - metrics = [prometheus.relabel.instance.receiver] - logs = [otelcol.receiver.loki.default.receiver] - traces = [otelcol.exporter.otlp.gc-fwd.input] + metrics = [otelcol.exporter.prometheus.default.input] + logs = [otelcol.exporter.otlp.gc_fwd.input] + traces = [otelcol.exporter.otlp.gc_fwd.input] } } diff --git a/modules/mixins/ecs-agent/default.nix b/modules/mixins/ecs-agent/default.nix index 954736b..16fd883 100644 --- a/modules/mixins/ecs-agent/default.nix +++ b/modules/mixins/ecs-agent/default.nix @@ -1,4 +1,4 @@ -{pkgs, ...}: { +{...}: { imports = [ ../docker ../alloy