vectordotdev · pront · Sep 27, 2023 · Sep 14, 2023 · Sep 18, 2023 · Sep 18, 2023
@@ -45,9 +45,10 @@ Comcast
 Consolas
 Coolpad
 DEBHELPER
-DOOV
 Danew
+dkr
 Dockerfiles
+DOOV
 Douban
 Enot
 Evercoss

@@ -0,0 +1,209 @@
+# Workload Checks Suite
+#
+# Runs Vector Workload Checks.
+#
+# Runs on:
+#  - scheduled daily UTC midnight
+
+# This workflow runs the collection of our workload checks, using the latest Vector nightly image,
+# which depends on when the workflow is invoked.
+#
+# The goal is to establish a baseline of check results for a variety of cases
+# and visualize trends for important Vector use cases.
+
+name: Workload Checks Suite
+
+on:
+  workflow_call:
+  workflow_dispatch:
+  schedule:
+    - cron: '0 0 * * *'
+  pull_request: #TODO delete this before merging
+    types:
+      - opened
+      - synchronize
+env:
+  SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }}
+
+jobs:
+  compute-metadata:
+    name: Compute metadata
+    runs-on: ubuntu-22.04
+    outputs:
+      replicas: ${{ steps.experimental-meta.outputs.REPLICAS }}
+      warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }}
+      total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }}
+      smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }}
+      lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }}
+
+      target-sha: ${{ steps.git-metadata.outputs.TARGET_SHA }}
+
+    steps:
+      - uses: actions/checkout@v3
+        with:
+          fetch-depth: 1000
+
+      - name: Get git metadata
+        id: git-metadata
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          export TARGET_SHA=$(git rev-parse HEAD)
+          echo "TARGET_SHA=${TARGET_SHA}" >> $GITHUB_OUTPUT
+
+          echo "target sha is: ${TARGET_SHA}"
+
+          if [ "${TARGET_SHA}" = "" ] ; then
+            echo "TARGET_SHA not found, exiting."
+            exit 1
+          fi
+
+      - name: Setup experimental metadata
+        id: experimental-meta
+        run: |
+          export WARMUP_SECONDS="45"
+          export REPLICAS="10"
+          export TOTAL_SAMPLES="600"
+          export SMP_CRATE_VERSION="0.10.0"
+          export LADING_VERSION="0.18.0"
+
+          echo "warmup seconds: ${WARMUP_SECONDS}"
+          echo "replicas: ${REPLICAS}"
+          echo "total samples: ${TOTAL_SAMPLES}"
+          echo "smp crate version: ${SMP_CRATE_VERSION}"
+          echo "lading version: ${LADING_VERSION}"
+
+          echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT
+          echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT
+          echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT
+          echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT
+          echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT
+
+  submit-job:
+    name: Submit workload checks job
+    runs-on: ubuntu-22.04
+    needs:
+      - compute-metadata
+    steps:
+      - name: Check status, in-progress
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
+            -f state='pending' \
+            -f description='Experiments submitted to the Workload Checks cluster.' \
+            -f context='Workload Checks Suite / submission' \
+            -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+      - uses: actions/checkout@v3
+
+      - name: Configure AWS Credentials
+        uses: aws-actions/[email protected]
+        with:
+          aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }}
+          aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }}
+          aws-region: us-west-2
+
+      - name: Login to Amazon ECR
+        id: login-ecr
+        uses: aws-actions/amazon-ecr-login@v1
+
+      - name: Download SMP binary
+        run: |
+          aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp
+
+      - name: Submit job
+        env:
+          RUST_LOG: info
+        run: |
+             CURRENT_DATE=$(date --utc '+%Y_%m_%d')
+             RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error"
+
+             chmod +x ${{ runner.temp }}/bin/smp
+             RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \
+                --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \
+                job submit-workload \
+                --lading-version ${{ needs.compute-metadata.outputs.lading-version }} \
+                --total-samples ${{ needs.compute-metadata.outputs.total-samples }} \
+                --warmup-seconds ${{ needs.compute-metadata.outputs.warmup-seconds }} \
+                --replicas ${{ needs.compute-metadata.outputs.replicas }} \
+                --target-image timberio/vector:nightly-debian \
+                --target-sha ${{ needs.compute-metadata.outputs.target-sha }} \
+                --target-config-dir ${{ github.workspace }}/workload-checks \
+                --target-name vector \
+                --target-command "/usr/bin/vector" \
+                --target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \
+                --tags smp_status=nightly,client_team="vector",tag_date="${CURRENT_DATE}" \
+                --submission-metadata ${{ runner.temp }}/submission-metadata
+
+      - uses: actions/upload-artifact@v3
+        with:
+          name: vector-submission-metadata
+          path: ${{ runner.temp }}/submission-metadata
+
+      - name: Await job
+        timeout-minutes: 120
+        env:
+          RUST_LOG: info
+        run: |
+          chmod +x ${{ runner.temp }}/bin/smp
+
+          ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \
+             job status \
+              --wait \
+              --wait-delay-seconds 60 \
+              --wait-timeout-minutes 90 \
+              --submission-metadata ${{ runner.temp }}/submission-metadata
+
+      - name: Handle cancellation if necessary
+        if: ${{ cancelled() }}
+        env:
+          RUST_LOG: info
+        run: |
+          chmod +x ${{ runner.temp }}/bin/smp
+          ${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \
+            --submission-metadata ${{ runner.temp }}/submission-metadata
+
+      - name: Check status, cancelled
+        if: ${{ cancelled() }}
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
+            -f state='failure' \
+            -f description='Experiments submitted to the Workload Checks cluster cancelled.' \
+            -f context='Workload Checks Suite / submission' \
+            -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+      - name: Check status, success
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
+            -f state='success' \
+            -f description='Experiments submitted to the Workload Checks cluster successfully.' \
+            -f context='Workload Checks Suite / submission' \
+            -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
+
+      - name: Check status, failure
+        if: ${{ failure() }}
+        env:
+          GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          gh api \
+            --method POST \
+            -H "Accept: application/vnd.github+json" \
+            /repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \
+            -f state='success' \
+            -f description='Experiments submitted to the Workload Checks Suite failed.' \
+            -f context='Workload Checks Suite / submission' \
+            -f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
@@ -0,0 +1,9 @@
+# Workload Checks
+
+The `smp` tool performs a nightly run of 'checks' to determine if Vector is fit for purpose.
+The 'checks' can help us answer questions about CPU usage, memory consumption, throughput etc.
+By consistently running these checks we establish a historical dataset [here](https://app.datadoghq.com/dashboard/wj9-9ds-q49?refresh_mode=sliding&from_ts=1694089061369&to_ts=1694693861369&live=true).
+
+## Adding an Experiment
+
+You can read more about the workload requirements [here](https://github.com/DataDog/datadog-agent/blob/main/test/workload-checks/README.md).
@@ -0,0 +1,5 @@
+# HTTP Text To HTTP JSON
+
+## Purpose
+
+Simulates a simple Vector use with one HTTP server source and one HTTP sink. This was added as a proof of concept for the SMP workload checks.
@@ -0,0 +1,22 @@
+description: >
+  Simulates a simple Vector use with one HTTP server source and one HTTP sink.
+  This was added as a proof of concept for the SMP workload checks.
+teams: []
+
+labels: {}
+
+checks:
+  - name: memory_usage
+    description: "Memory usage"
+    bounds:
+      series: rss_bytes
+      # The machine has 12Gb free.
+      upper_bound: 3.5Gb
+
+  - name: cpu_utilization
+    description: "CPU utilization"
+    bounds:
+      series: cpu_percentage
+      # The machine has 8 cores available.
+      lower_bound: 400
+      upper_bound: 800
@@ -0,0 +1,16 @@
+generator:
+  - http:
+      seed: [2, 3, 5, 7, 11, 13, 17, 19, 23, 29, 31, 37, 41, 43, 47, 53,
+             59, 61, 67, 71, 73, 79, 83, 89, 97, 101, 103, 107, 109, 113, 127, 131]
+      headers: {}
+      target_uri: "http://localhost:8282/"
+      bytes_per_second: "500 Mb"
+      parallel_connections: 10
+      method:
+        post:
+          maximum_prebuild_cache_size_bytes: "256 Mb"
+          variant: "apache_common"
+
+blackhole:
+  - http:
+      binding_addr: "0.0.0.0:8080"
@@ -0,0 +1,12 @@
+data_dir = "/var/lib/vector"
+
+[sources.logs]
+type = "http_server"
+address = "0.0.0.0:8282"
+decoding.codec = "bytes"
+
+[sinks.http_sink]
+type = "http"
+uri = "http://localhost:8080"
+inputs = ["logs"]
+encoding.codec = "json"
@@ -0,0 +1,7 @@
+description: >
+  An ‘average’ customer server on which the agent runs alongside user
+  software. This is equivalent to an AWS c5.2xlarge with 4Gb of system memory
+  held back for system processes.
+name: typical
+cpu: 8
+memory: 12Gb