-
Notifications
You must be signed in to change notification settings - Fork 1.6k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
7 changed files
with
422 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,352 @@ | ||
# Workload Checks | ||
# | ||
# Runs Vector Workload Checks. | ||
# | ||
# Runs on: | ||
# - scheduled daily UTC midnight | ||
|
||
# This workflow runs the collection of our workload checks, using the repo HEAD SHA, | ||
# which depends on when the workflow is invoked. | ||
# | ||
# The goal is to establish a baseline of check results for a variety of cases | ||
# and visualize trends for important Vector use cases. | ||
# | ||
# The HEAD SHA is also used to tag the Vector Docker image. | ||
|
||
name: Workload Checks | ||
|
||
on: | ||
workflow_call: | ||
workflow_dispatch: | ||
schedule: | ||
- cron: '0 0 * * *' | ||
|
||
env: | ||
SINGLE_MACHINE_PERFORMANCE_API: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_API }} | ||
|
||
jobs: | ||
compute-metadata: | ||
name: Compute metadata | ||
runs-on: ubuntu-22.04 | ||
needs: should-run | ||
outputs: | ||
target-sha: ${{ steps.pr-metadata-comment.outputs.TARGET_SHA }} | ||
|
||
# below are used in the experiment/analyze jobs | ||
cpus: ${{ steps.system.outputs.CPUS }} | ||
memory: ${{ steps.system.outputs.MEMORY }} | ||
vector-cpus: ${{ steps.system.outputs.VECTOR_CPUS }} | ||
|
||
replicas: ${{ steps.experimental-meta.outputs.REPLICAS }} | ||
warmup-seconds: ${{ steps.experimental-meta.outputs.WARMUP_SECONDS }} | ||
total-samples: ${{ steps.experimental-meta.outputs.TOTAL_SAMPLES }} | ||
p-value: ${{ steps.experimental-meta.outputs.P_VALUE }} | ||
smp-version: ${{ steps.experimental-meta.outputs.SMP_CRATE_VERSION }} | ||
lading-version: ${{ steps.experimental-meta.outputs.LADING_VERSION }} | ||
|
||
steps: | ||
- uses: actions/checkout@v3 | ||
with: | ||
fetch-depth: 1000 | ||
|
||
- name: Get git metadata | ||
id: git-metadata | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
export TARGET_SHA=$(git merge-base master HEAD) | ||
echo "TARGET_SHA=${TARGET_SHA}" >> $GITHUB_OUTPUT | ||
echo "target sha is: ${TARGET_SHA}" | ||
if [ "${TARGET_SHA}" = "" ] ; then | ||
echo "TARGET_SHA not found, exiting." | ||
exit 1 | ||
fi | ||
- name: Setup experimental metadata | ||
id: experimental-meta | ||
run: | | ||
export WARMUP_SECONDS="45" | ||
export REPLICAS="10" | ||
export TOTAL_SAMPLES="600" | ||
export P_VALUE="0.1" | ||
export SMP_CRATE_VERSION="0.10.0" | ||
export LADING_VERSION="0.18.0" | ||
echo "warmup seconds: ${WARMUP_SECONDS}" | ||
echo "replicas: ${REPLICAS}" | ||
echo "total samples: ${TOTAL_SAMPLES}" | ||
echo "regression p-value: ${P_VALUE}" | ||
echo "smp crate version: ${SMP_CRATE_VERSION}" | ||
echo "lading version: ${LADING_VERSION}" | ||
echo "WARMUP_SECONDS=${WARMUP_SECONDS}" >> $GITHUB_OUTPUT | ||
echo "REPLICAS=${REPLICAS}" >> $GITHUB_OUTPUT | ||
echo "TOTAL_SAMPLES=${TOTAL_SAMPLES}" >> $GITHUB_OUTPUT | ||
echo "P_VALUE=${P_VALUE}" >> $GITHUB_OUTPUT | ||
echo "SMP_CRATE_VERSION=${SMP_CRATE_VERSION}" >> $GITHUB_OUTPUT | ||
echo "LADING_VERSION=${LADING_VERSION}" >> $GITHUB_OUTPUT | ||
- name: Setup system details | ||
id: system | ||
run: | | ||
export CPUS="7" | ||
export MEMORY="30g" | ||
export VECTOR_CPUS="4" | ||
echo "cpus total: ${CPUS}" | ||
echo "memory total: ${MEMORY}" | ||
echo "vector cpus: ${VECTOR_CPUS}" | ||
echo "CPUS=${CPUS}" >> $GITHUB_OUTPUT | ||
echo "MEMORY=${MEMORY}" >> $GITHUB_OUTPUT | ||
echo "VECTOR_CPUS=${VECTOR_CPUS}" >> $GITHUB_OUTPUT | ||
## | ||
## BUILD | ||
## | ||
|
||
build-target: | ||
name: Build target Vector container | ||
runs-on: [linux, ubuntu-20.04-4core] | ||
needs: | ||
- compute-metadata | ||
steps: | ||
- uses: colpal/actions-clean@v1 | ||
|
||
- uses: actions/checkout@v3 | ||
with: | ||
ref: ${{ needs.compute-metadata.outputs.target-sha }} | ||
path: target-vector | ||
|
||
- name: Set up Docker Buildx | ||
id: buildx | ||
uses: docker/[email protected] | ||
|
||
- name: Build 'vector' target image | ||
uses: docker/[email protected] | ||
with: | ||
context: target-vector/ | ||
cache-from: type=gha | ||
cache-to: type=gha,mode=max | ||
file: regression/Dockerfile | ||
builder: ${{ steps.buildx.outputs.name }} | ||
outputs: type=docker,dest=${{ runner.temp }}/target-image.tar | ||
tags: | | ||
vector:${{ needs.compute-metadata.outputs.target-sha }} | ||
- name: Upload image as artifact | ||
uses: actions/upload-artifact@v3 | ||
with: | ||
name: target-image | ||
path: "${{ runner.temp }}/target-image.tar" | ||
|
||
confirm-valid-credentials: | ||
name: Confirm AWS credentials are minimally valid | ||
runs-on: ubuntu-22.04 | ||
needs: | ||
- compute-metadata | ||
steps: | ||
- name: Configure AWS Credentials | ||
uses: aws-actions/[email protected] | ||
with: | ||
aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} | ||
aws-region: us-west-2 | ||
|
||
- name: Download SMP binary | ||
run: | | ||
aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp | ||
## | ||
## SUBMIT | ||
## | ||
|
||
upload-target-image-to-ecr: | ||
name: Upload target images to ECR | ||
runs-on: ubuntu-22.04 | ||
needs: | ||
- compute-metadata | ||
- confirm-valid-credentials | ||
- build-target | ||
steps: | ||
- name: 'Download target image' | ||
uses: actions/download-artifact@v3 | ||
with: | ||
name: target-image | ||
|
||
- name: Load target image | ||
run: | | ||
docker load --input target-image.tar | ||
- name: Configure AWS Credentials | ||
uses: aws-actions/[email protected] | ||
with: | ||
aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} | ||
aws-region: us-west-2 | ||
|
||
- name: Login to Amazon ECR | ||
id: login-ecr | ||
uses: aws-actions/amazon-ecr-login@v1 | ||
|
||
- name: Docker Login to ECR | ||
uses: docker/login-action@v2 | ||
with: | ||
registry: ${{ steps.login-ecr.outputs.registry }} | ||
|
||
- name: Tag & push target image | ||
run: | | ||
docker tag vector:${{ needs.compute-metadata.outputs.target-sha }} ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} | ||
docker push ${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} | ||
submit-job: | ||
name: Submit workload checks job | ||
runs-on: ubuntu-22.04 | ||
needs: | ||
- compute-metadata | ||
- upload-target-image-to-ecr | ||
steps: | ||
- name: Check status, in-progress | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
gh api \ | ||
--method POST \ | ||
-H "Accept: application/vnd.github+json" \ | ||
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ | ||
-f state='pending' \ | ||
-f description='Experiments submitted to the Regression Detection cluster.' \ | ||
-f context='Regression Detection Suite / submission' \ | ||
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||
- uses: actions/checkout@v3 | ||
|
||
- name: Configure AWS Credentials | ||
uses: aws-actions/[email protected] | ||
with: | ||
aws-access-key-id: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_ACCESS_KEY_ID }} | ||
aws-secret-access-key: ${{ secrets.SINGLE_MACHINE_PERFORMANCE_BOT_SECRET_ACCESS_KEY }} | ||
aws-region: us-west-2 | ||
|
||
- name: Login to Amazon ECR | ||
id: login-ecr | ||
uses: aws-actions/amazon-ecr-login@v1 | ||
|
||
- name: Download SMP binary | ||
run: | | ||
aws s3 cp s3://smp-cli-releases/v${{ needs.compute-metadata.outputs.smp-version }}/x86_64-unknown-linux-gnu/smp ${{ runner.temp }}/bin/smp | ||
- name: Submit job | ||
env: | ||
RUST_LOG: info | ||
run: | | ||
git fetch origin | ||
# Setup AWS credentials for single-machine-performance AWS account | ||
AWS_NAMED_PROFILE="single-machine-performance" | ||
SMP_ACCOUNT_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-account-id --with-decryption --query "Parameter.Value" --out text) | ||
SMP_ECR_URL=${SMP_ACCOUNT_ID}.dkr.ecr.us-west-2.amazonaws.com | ||
Check failure Code scanning / check-spelling Unrecognized Spelling Error
dkr is not a recognized word. (unrecognized-spelling)
|
||
SMP_AGENT_TEAM_ID=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-agent-team-id --with-decryption --query "Parameter.Value" --out text) | ||
SMP_API=$(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-api --with-decryption --query "Parameter.Value" --out text) | ||
aws configure set aws_access_key_id $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key-id --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} | ||
aws configure set aws_secret_access_key $(aws ssm get-parameter --region us-east-1 --name ci.datadog-agent.single-machine-performance-bot-access-key --with-decryption --query "Parameter.Value" --out text) --profile ${AWS_NAMED_PROFILE} | ||
aws configure set region us-west-2 --profile ${AWS_NAMED_PROFILE} | ||
# Download smp binary and prepare it for use | ||
aws --profile single-machine-performance s3 cp s3://smp-cli-releases/v${SMP_VERSION}/x86_64-unknown-linux-gnu/smp smp | ||
chmod +x smp | ||
TARGET_IMAGE =${{ steps.login-ecr.outputs.registry }}/${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }}-vector:${{ needs.compute-metadata.outputs.target-sha }} | ||
CURRENT_DATE=$(date --utc '+%Y_%m_%d') | ||
RUST_LOG="info,aws_config::profile::credentials=error" | ||
RUST_LOG_DEBUG="debug,aws_config::profile::credentials=error" | ||
chmod +x ${{ runner.temp }}/bin/smp | ||
RUST_BACKTRACE=1 RUST_LOG="${RUST_LOG_DEBUG}" ${{ runner.temp }}/bin/smp \ | ||
--team-id ${SMP_AGENT_TEAM_ID} --api-base ${SMP_API} --aws-named-profile ${AWS_NAMED_PROFILE} \ | ||
job submit-workload \ | ||
--lading-version ${LADING_VERSION} \ | ||
--total-samples ${TOTAL_SAMPLES} \ | ||
--warmup-seconds ${WARMUP_SECONDS} \ | ||
--replicas ${REPLICAS} \ | ||
--target-image ${TARGET_IMAGE} \ | ||
--target-sha ${CI_COMMIT_SHA} \ | ||
--target-config-dir test/workload-checks \ | ||
--target-name datadog-agent \ | ||
--target-command "/bin/entrypoint.sh" \ | ||
--target-environment-variables "DD_HOSTNAME=smp-workload-checks,DD_DD_URL=http://127.0.0.1:9092,DD_API_KEY=00000001" \ | ||
--tags smp_status=nightly,client_team="agent",tag_date="${CURRENT_DATE}" \ | ||
--submission-metadata submission-metadata | ||
- uses: actions/upload-artifact@v3 | ||
with: | ||
name: vector-submission-metadata | ||
path: ${{ runner.temp }}/submission-metadata | ||
|
||
- name: Await job | ||
timeout-minutes: 120 | ||
env: | ||
RUST_LOG: info | ||
run: | | ||
chmod +x ${{ runner.temp }}/bin/smp | ||
${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} \ | ||
job status \ | ||
--wait \ | ||
--wait-delay-seconds 60 \ | ||
--wait-timeout-minutes 90 \ | ||
--submission-metadata ${{ runner.temp }}/submission-metadata | ||
- name: Handle cancellation if necessary | ||
if: ${{ cancelled() }} | ||
env: | ||
RUST_LOG: info | ||
run: | | ||
chmod +x ${{ runner.temp }}/bin/smp | ||
${{ runner.temp }}/bin/smp --team-id ${{ secrets.SINGLE_MACHINE_PERFORMANCE_TEAM_ID }} job cancel \ | ||
--submission-metadata ${{ runner.temp }}/submission-metadata | ||
- name: Check status, cancelled | ||
if: ${{ cancelled() }} | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
gh api \ | ||
--method POST \ | ||
-H "Accept: application/vnd.github+json" \ | ||
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ | ||
-f state='failure' \ | ||
-f description='Experiments submitted to the Regression Detection cluster cancelled.' \ | ||
-f context='Regression Detection Suite / submission' \ | ||
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||
- name: Check status, success | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
gh api \ | ||
--method POST \ | ||
-H "Accept: application/vnd.github+json" \ | ||
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ | ||
-f state='success' \ | ||
-f description='Experiments submitted to the Regression Detection cluster successfully.' \ | ||
-f context='Regression Detection Suite / submission' \ | ||
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} | ||
- name: Check status, failure | ||
if: ${{ failure() }} | ||
env: | ||
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} | ||
run: | | ||
gh api \ | ||
--method POST \ | ||
-H "Accept: application/vnd.github+json" \ | ||
/repos/${{ github.repository }}/statuses/${{ needs.compute-metadata.outputs.target-sha }} \ | ||
-f state='success' \ | ||
-f description='Experiments submitted to the Regression Detection Suite failed.' \ | ||
-f context='Regression Detection Suite / submission' \ | ||
-f target_url=${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# Workload Checks | ||
|
||
The `smp` tool performs a nightly run of 'checks' to determine if Vector is fit for purpose. | ||
The 'checks' can help us answer questions about CPU usage, memory consumption, throughput etc. | ||
By consistently running these checks we establish a historical dataset [here](https://app.datadoghq.com/dashboard/wj9-9ds-q49?refresh_mode=sliding&from_ts=1694089061369&to_ts=1694693861369&live=true). | ||
|
||
## Adding an Experiment | ||
|
||
You can read more about the workload requirements [here](https://github.com/DataDog/datadog-agent/blob/main/test/workload-checks/README.md). |
5 changes: 5 additions & 0 deletions
5
workload-checks/typical/cases/http_text_to_http_json/README.md
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
# HTTP Text To HTTP JSON | ||
|
||
## Purpose | ||
|
||
Simulates a simple Vector use with one HTTP server source and one HTTP sink. This was added as a proof of concept for the SMP workload checks. |
21 changes: 21 additions & 0 deletions
21
workload-checks/typical/cases/http_text_to_http_json/experiment.yaml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
description: > | ||
Simulates a simple Vector use with one HTTP server source and one HTTP sink. | ||
This was added as a proof of concept for the SMP workload checks. | ||
teams: [] | ||
|
||
labels: {} | ||
|
||
checks: | ||
- name: memory_usage | ||
description: "Memory usage" | ||
bounds: | ||
series: rss_bytes | ||
# The machine has 12Gb free. | ||
upper_bound: 3.5Gb | ||
|
||
- name: cpu_utilization | ||
description: "CPU utilization" | ||
bounds: | ||
series: cpu_percentage | ||
# The machine has 8 cores available. | ||
upper_bound: 400 |
Oops, something went wrong.