[pull] master from Significant-Gravitas:master #132
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Classic - AGBenchmark CI | |
on: | |
push: | |
branches: [ master, dev, ci-test* ] | |
paths: | |
- 'classic/benchmark/**' | |
- '!classic/benchmark/reports/**' | |
- .github/workflows/classic-benchmark-ci.yml | |
pull_request: | |
branches: [ master, dev, release-* ] | |
paths: | |
- 'classic/benchmark/**' | |
- '!classic/benchmark/reports/**' | |
- .github/workflows/classic-benchmark-ci.yml | |
concurrency: | |
group: ${{ format('benchmark-ci-{0}', github.head_ref && format('{0}-{1}', github.event_name, github.event.pull_request.number) || github.sha) }} | |
cancel-in-progress: ${{ startsWith(github.event_name, 'pull_request') }} | |
defaults: | |
run: | |
shell: bash | |
env: | |
min-python-version: '3.10' | |
jobs: | |
test: | |
permissions: | |
contents: read | |
timeout-minutes: 30 | |
strategy: | |
fail-fast: false | |
matrix: | |
python-version: ["3.10"] | |
platform-os: [ubuntu, macos, macos-arm64, windows] | |
runs-on: ${{ matrix.platform-os != 'macos-arm64' && format('{0}-latest', matrix.platform-os) || 'macos-14' }} | |
defaults: | |
run: | |
shell: bash | |
working-directory: classic/benchmark | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: true | |
- name: Set up Python ${{ matrix.python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ matrix.python-version }} | |
- name: Set up Python dependency cache | |
# On Windows, unpacking cached dependencies takes longer than just installing them | |
if: runner.os != 'Windows' | |
uses: actions/cache@v4 | |
with: | |
path: ${{ runner.os == 'macOS' && '~/Library/Caches/pypoetry' || '~/.cache/pypoetry' }} | |
key: poetry-${{ runner.os }}-${{ hashFiles('classic/benchmark/poetry.lock') }} | |
- name: Install Poetry (Unix) | |
if: runner.os != 'Windows' | |
run: | | |
curl -sSL https://install.python-poetry.org | python3 - | |
if [ "${{ runner.os }}" = "macOS" ]; then | |
PATH="$HOME/.local/bin:$PATH" | |
echo "$HOME/.local/bin" >> $GITHUB_PATH | |
fi | |
- name: Install Poetry (Windows) | |
if: runner.os == 'Windows' | |
shell: pwsh | |
run: | | |
(Invoke-WebRequest -Uri https://install.python-poetry.org -UseBasicParsing).Content | python - | |
$env:PATH += ";$env:APPDATA\Python\Scripts" | |
echo "$env:APPDATA\Python\Scripts" >> $env:GITHUB_PATH | |
- name: Install Python dependencies | |
run: poetry install | |
- name: Run pytest with coverage | |
run: | | |
poetry run pytest -vv \ | |
--cov=agbenchmark --cov-branch --cov-report term-missing --cov-report xml \ | |
--durations=10 \ | |
tests | |
env: | |
CI: true | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
- name: Upload coverage reports to Codecov | |
uses: codecov/codecov-action@v4 | |
with: | |
token: ${{ secrets.CODECOV_TOKEN }} | |
flags: agbenchmark,${{ runner.os }} | |
self-test-with-agent: | |
runs-on: ubuntu-latest | |
strategy: | |
matrix: | |
agent-name: [forge] | |
fail-fast: false | |
timeout-minutes: 20 | |
steps: | |
- name: Checkout repository | |
uses: actions/checkout@v4 | |
with: | |
fetch-depth: 0 | |
submodules: true | |
- name: Set up Python ${{ env.min-python-version }} | |
uses: actions/setup-python@v5 | |
with: | |
python-version: ${{ env.min-python-version }} | |
- name: Install Poetry | |
run: | | |
curl -sSL https://install.python-poetry.org | python - | |
- name: Run regression tests | |
working-directory: classic | |
run: | | |
./run agent start ${{ matrix.agent-name }} | |
cd ${{ matrix.agent-name }} | |
set +e # Ignore non-zero exit codes and continue execution | |
echo "Running the following command: poetry run agbenchmark --maintain --mock" | |
poetry run agbenchmark --maintain --mock | |
EXIT_CODE=$? | |
set -e # Stop ignoring non-zero exit codes | |
# Check if the exit code was 5, and if so, exit with 0 instead | |
if [ $EXIT_CODE -eq 5 ]; then | |
echo "regression_tests.json is empty." | |
fi | |
echo "Running the following command: poetry run agbenchmark --mock" | |
poetry run agbenchmark --mock | |
echo "Running the following command: poetry run agbenchmark --mock --category=data" | |
poetry run agbenchmark --mock --category=data | |
echo "Running the following command: poetry run agbenchmark --mock --category=coding" | |
poetry run agbenchmark --mock --category=coding | |
# echo "Running the following command: poetry run agbenchmark --test=WriteFile" | |
# poetry run agbenchmark --test=WriteFile | |
cd ../benchmark | |
poetry install | |
echo "Adding the BUILD_SKILL_TREE environment variable. This will attempt to add new elements in the skill tree. If new elements are added, the CI fails because they should have been pushed" | |
export BUILD_SKILL_TREE=true | |
# poetry run agbenchmark --mock | |
# CHANGED=$(git diff --name-only | grep -E '(agbenchmark/challenges)|(../classic/frontend/assets)') || echo "No diffs" | |
# if [ ! -z "$CHANGED" ]; then | |
# echo "There are unstaged changes please run agbenchmark and commit those changes since they are needed." | |
# echo "$CHANGED" | |
# exit 1 | |
# else | |
# echo "No unstaged changes." | |
# fi | |
env: | |
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} | |
TELEMETRY_ENVIRONMENT: autogpt-benchmark-ci | |
TELEMETRY_OPT_IN: ${{ github.ref_name == 'master' }} |