From b52e28b2aedb72fe389f40eedf7ab4a4784505b7 Mon Sep 17 00:00:00 2001 From: Viet Nguyen Duc Date: Tue, 3 Dec 2024 10:09:55 +0700 Subject: [PATCH] K8s: Add test results for autoscaling Signed-off-by: Viet Nguyen Duc --- .github/workflows/k8s-scaling-test.yml | 215 +++++++++++++----- .keda/README.md | 17 +- ...s_test_k8s_autoscaling_deployment_count.md | 22 ++ ...s_autoscaling_deployment_count_in_chaos.md | 22 ++ ...deployment_count_with_node_max_sessions.md | 22 ++ ..._autoscaling_job_count_strategy_default.md | 22 ++ ...ing_job_count_strategy_default_in_chaos.md | 22 ++ ...strategy_default_with_node_max_sessions.md | 22 ++ .keda/scalers/selenium_grid_scaler.go | 102 ++++----- .keda/scalers/selenium_grid_scaler_test.go | 192 +++++++++------- Makefile | 32 ++- charts/selenium-grid/CONFIGURATION.md | 8 +- charts/selenium-grid/values.yaml | 7 +- tests/AutoscalingTests/__init__.py | 0 tests/AutoscalingTests/common.py | 95 ++++++++ tests/AutoscalingTests/test_scale_chaos.py | 62 +++++ tests/AutoscalingTests/test_scale_up.py | 65 ++++++ tests/README.md | 18 ++ tests/bootstrap.sh | 18 +- .../ci/DeploymentAutoscaling-values.yaml | 4 +- tests/charts/ci/JobAutoscaling-values.yaml | 2 + tests/charts/ci/base-recorder-values.yaml | 2 +- tests/charts/make/chart_test.sh | 24 +- tests/requirements.txt | 5 + 24 files changed, 779 insertions(+), 221 deletions(-) create mode 100644 .keda/results_test_k8s_autoscaling_deployment_count.md create mode 100644 .keda/results_test_k8s_autoscaling_deployment_count_in_chaos.md create mode 100644 .keda/results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md create mode 100644 .keda/results_test_k8s_autoscaling_job_count_strategy_default.md create mode 100644 .keda/results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md create mode 100644 .keda/results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md create mode 100644 tests/AutoscalingTests/__init__.py create mode 100644 tests/AutoscalingTests/common.py create mode 100644 tests/AutoscalingTests/test_scale_chaos.py create mode 100644 tests/AutoscalingTests/test_scale_up.py create mode 100644 tests/README.md create mode 100644 tests/requirements.txt diff --git a/.github/workflows/k8s-scaling-test.yml b/.github/workflows/k8s-scaling-test.yml index 9453a8031c..3dcd661f89 100644 --- a/.github/workflows/k8s-scaling-test.yml +++ b/.github/workflows/k8s-scaling-test.yml @@ -1,4 +1,4 @@ -name: Test Autoscaling +name: Test Grid Autoscaling in Kubernetes on: workflow_call: @@ -8,20 +8,82 @@ on: required: false type: string default: 'false' + push-results: + description: 'Publish the results to the repository' + required: false + type: boolean + default: false + iteration: + description: 'Test a specific iteration' + required: false + type: string + default: '20' workflow_dispatch: + inputs: + publish-results: + description: 'Publish the results to the repository' + required: false + type: boolean + default: false + pr-results: + description: 'Create a PR with the results' + required: false + type: boolean + default: false + iteration: + description: 'Test a specific iteration' + required: false + type: string + default: '20' permissions: - contents: read + contents: write + pull-requests: write + +env: + RUN_ID: ${{ github.run_id }} + TEST_AUTOSCALING_ITERATIONS: ${{ github.event.inputs.iteration || '20' }} jobs: build-and-test: name: Test K8s - runs-on: blacksmith-16vcpu-ubuntu-2204 + runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_job_count_strategy_default_in_chaos + cluster: 'minikube' + helm-version: 'v3.16.3' + docker-version: '27.3.1' + python-version: '3.13' + - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions + cluster: 'minikube' + helm-version: 'v3.16.3' + docker-version: '27.3.1' + python-version: '3.13' + - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_job_count_strategy_default + cluster: 'minikube' + helm-version: 'v3.16.3' + docker-version: '27.3.1' + python-version: '3.13' + - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_deployment_count_in_chaos + cluster: 'minikube' + helm-version: 'v3.16.3' + docker-version: '27.3.1' + python-version: '3.13' + - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_deployment_count_with_node_max_sessions + cluster: 'minikube' + helm-version: 'v3.16.3' + docker-version: '27.3.1' + python-version: '3.13' + - k8s-version: 'v1.31.2' + test-strategy: test_k8s_autoscaling_deployment_count cluster: 'minikube' helm-version: 'v3.16.3' docker-version: '27.3.1' @@ -53,8 +115,6 @@ jobs: with: python-version: ${{ matrix.python-version }} check-latest: true - - name: Verify chart configuration up-to-date - run: make lint_readme_charts - name: Get branch name (only for push to branch) if: github.event_name == 'push' run: echo "BRANCH=$(echo ${PUSH_BRANCH##*/})" >> $GITHUB_ENV @@ -79,79 +139,40 @@ jobs: echo "AUTHORS=${AUTHORS}" >> $GITHUB_ENV env: AUTHORS: ${{ vars.AUTHORS || 'SeleniumHQ' }} - - name: Build Helm charts - run: | - BUILD_DATE=${BUILD_DATE} make chart_build - echo "CHART_PACKAGE_PATH=$(cat /tmp/selenium_chart_version)" >> $GITHUB_ENV - echo "CHART_FILE_NAME=$(basename $(cat /tmp/selenium_chart_version))" >> $GITHUB_ENV - - name: Build Docker images - uses: nick-invision/retry@master - with: - timeout_minutes: 12 - max_attempts: 3 - retry_wait_seconds: 60 - command: NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} make build - name: Setup Kubernetes cluster uses: nick-invision/retry@master with: timeout_minutes: 10 max_attempts: 3 command: CLUSTER=${CLUSTER} SERVICE_MESH=${SERVICE_MESH} KUBERNETES_VERSION=${KUBERNETES_VERSION} NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} make chart_cluster_setup - - name: Test Selenium Grid on Kubernetes with Autoscaling - uses: nick-invision/retry@master - with: - timeout_minutes: 30 - max_attempts: 3 - command: | - NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_chaos - - name: Upload results - if: always() - uses: actions/upload-artifact@main - with: - name: chart_test_autoscaling_job_count_chaos - path: ./tests/tests/*.md - if-no-files-found: ignore - - name: Test Selenium Grid on Kubernetes with Autoscaling - uses: nick-invision/retry@master - with: - timeout_minutes: 30 - max_attempts: 3 - command: | - NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_max_sessions - - name: Upload results - if: always() - uses: actions/upload-artifact@main - with: - name: chart_test_autoscaling_job_count_max_sessions - path: ./tests/tests/*.md - if-no-files-found: ignore - - name: Test Selenium Grid on Kubernetes with Autoscaling + - name: Build Docker images uses: nick-invision/retry@master with: - timeout_minutes: 30 + timeout_minutes: 12 max_attempts: 3 - command: | - NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count_strategy_accurate - - name: Upload results - if: always() - uses: actions/upload-artifact@main - with: - name: chart_test_autoscaling_job_count_strategy_accurate - path: ./tests/tests/*.md - if-no-files-found: ignore + retry_wait_seconds: 60 + command: NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} make build + - name: Build Helm charts + run: | + BUILD_DATE=${BUILD_DATE} make chart_build + echo "CHART_PACKAGE_PATH=$(cat /tmp/selenium_chart_version)" >> $GITHUB_ENV + echo "CHART_FILE_NAME=$(basename $(cat /tmp/selenium_chart_version))" >> $GITHUB_ENV - name: Test Selenium Grid on Kubernetes with Autoscaling uses: nick-invision/retry@master with: timeout_minutes: 30 max_attempts: 3 command: | - NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false make chart_test_autoscaling_job_count + NAME=${IMAGE_REGISTRY} VERSION=${BRANCH} BUILD_DATE=${BUILD_DATE} TEST_UPGRADE_CHART=false TEST_AUTOSCALING_ITERATIONS=${TEST_AUTOSCALING_ITERATIONS} \ + make ${{ matrix.test-strategy }} + - name: Rename results + run: mv ./tests/tests/autoscaling_results.md ./tests/tests/results_${{ matrix.test-strategy }}.md - name: Upload results if: always() - uses: actions/upload-artifact@main + uses: actions/upload-artifact@v4 with: - name: chart_test_autoscaling_job_count - path: ./tests/tests/*.md + name: results_${{ matrix.test-strategy }} + path: ./tests/tests/results_${{ matrix.test-strategy }}.md if-no-files-found: ignore - name: Cleanup Kubernetes cluster if: always() @@ -159,3 +180,77 @@ jobs: - name: Clean up Docker if: always() run: docker system prune -af + + publish-results: + name: Publish Results + if: (!failure() && !cancelled() && (github.event.inputs.publish-results == 'true')) + runs-on: ubuntu-latest + needs: build-and-test + steps: + - name: Checkout code + uses: actions/checkout@main + with: + persist-credentials: false + fetch-depth: 0 + - name: Download results + uses: actions/download-artifact@v4 + with: + path: ./.keda + pattern: 'results_*' + merge-multiple: 'true' + run-id: ${{ env.RUN_ID }} + github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Commit files + run: | + git config --local user.email "selenium-ci@users.noreply.github.com" + git config --local user.name "Selenium CI Bot" + git add .keda/. + git commit -m "[ci] Upload autoscaling in K8s test results [skip ci]" -a + - name: Push changes + uses: ad-m/github-push-action@master + with: + github_token: ${{ secrets.SELENIUM_CI_TOKEN }} + branch: ${{ env.BRANCH_NAME }} + env: + BRANCH_NAME: ${{ github.head_ref || github.ref_name }} + + + pr-results: + name: Create a PR with the results + if: (!failure() && !cancelled() && (github.event.inputs.pr-results == 'true')) + runs-on: ubuntu-latest + needs: build-and-test + steps: + - name: Checkout code + uses: actions/checkout@main + with: + persist-credentials: false + fetch-depth: 0 + - name: Download results + uses: actions/download-artifact@v4 + with: + path: ./.keda + pattern: 'results_*' + merge-multiple: 'true' + run-id: ${{ env.RUN_ID }} + github-token: ${{ secrets.GITHUB_TOKEN }} + - name: Commit configs + run: | + git config --local user.email "selenium-ci@users.noreply.github.com" + git config --local user.name "Selenium CI Bot" + - name: Create Pull Request + id: cpr + uses: peter-evans/create-pull-request@main + with: + token: ${{ secrets.SELENIUM_CI_TOKEN }} + commit-message: "[ci] Upload autoscaling in K8s test results" + title: "[ci] Upload autoscaling in K8s test results" + body: "This PR contains the results of the autoscaling tests in Kubernetes" + committer: 'Selenium CI Bot ' + author: 'Selenium CI Bot ' + branch: autoscaling-results + - name: Check outputs + if: ${{ steps.cpr.outputs.pull-request-number }} + run: | + echo "Pull Request Number - ${{ steps.cpr.outputs.pull-request-number }}" + echo "Pull Request URL - ${{ steps.cpr.outputs.pull-request-url }}" diff --git a/.keda/README.md b/.keda/README.md index 0448f2e296..abafe645e4 100644 --- a/.keda/README.md +++ b/.keda/README.md @@ -51,13 +51,24 @@ You can involve to review and discuss the pull requests to help us early detect - https://github.com/kedacore/keda/pull/6368 -~~- https://github.com/kedacore/keda/pull/6169 (merged, v2.16.0)~~ +- ~~https://github.com/kedacore/keda/pull/6169 (merged, v2.16.0)~~ [kedacore/keda-docs](https://github.com/kedacore/keda-docs) -- https://github.com/kedacore/keda-docs/pull/1504 +- ~~https://github.com/kedacore/keda-docs/pull/1468 (merged, v2.16.0)~~ -~~- https://github.com/kedacore/keda-docs/pull/1468 (merged, v2.16.0)~~ +# Test results of the patch scaler + +There are tests for the patched scaler implementation. You can run the tests by following the steps in [../tests/README.md](../tests/README.md). + +Test results could be referred to + +- [results_test_k8s_autoscaling_job_count_strategy_default.md](./results_test_k8s_autoscaling_job_count_strategy_default.md) +- [results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md](./results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md) +- [results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md](./results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md) +- [results_test_k8s_autoscaling_deployment_count.md](./results_test_k8s_autoscaling_deployment_count.md) +- [results_test_k8s_autoscaling_deployment_count_in_chaos.md](./results_test_k8s_autoscaling_deployment_count_in_chaos.md) +- [results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md](./results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md) # Resources diff --git a/.keda/results_test_k8s_autoscaling_deployment_count.md b/.keda/results_test_k8s_autoscaling_deployment_count.md new file mode 100644 index 0000000000..4c572d841b --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_deployment_count.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 1 | 39.44 s | 0 | 1 | 1 | 1 | 1 | 0 | 1 | +| 2 | 2 | 39.77 s | 0 | 1 | 2 | 2 | 1 | 0 | 0 | +| 3 | 1 | 28.38 s | 0 | 1 | 3 | 3 | 1 | 0 | 0 | +| 4 | 2 | 54.92 s | 0 | 2 | 5 | 5 | 1 | 0 | 0 | +| 5 | 2 | 33.04 s | 0 | 2 | 7 | 7 | 1 | 0 | 0 | +| 6 | 1 | 29.43 s | 0 | 1 | 8 | 8 | 1 | 0 | 8 | +| 7 | 2 | 47.04 s | 0 | 1 | 2 | 9 | 1 | 7 | 0 | +| 8 | 2 | 43.42 s | 0 | 1 | 4 | 10 | 1 | 6 | 0 | +| 9 | 2 | 47.99 s | 0 | 0 | 6 | 10 | 1 | 4 | 0 | +| 10 | 3 | 44.97 s | 0 | 2 | 9 | 12 | 1 | 3 | 0 | +| 11 | 3 | 42.98 s | 0 | -2 | 12 | 10 | 1 | -2 | 12 | +| 12 | 1 | 4.52 s | 0 | 0 | 1 | 10 | 1 | 9 | 0 | +| 13 | 2 | 39.93 s | 0 | 1 | 3 | 11 | 1 | 8 | 0 | +| 14 | 1 | 29.45 s | 0 | 1 | 4 | 12 | 1 | 8 | 0 | +| 15 | 2 | 30.52 s | 0 | 1 | 6 | 13 | 1 | 7 | 0 | +| 16 | 3 | 35.62 s | 0 | 3 | 9 | 16 | 1 | 7 | 9 | +| 17 | 3 | 16.59 s | 0 | 0 | 3 | 16 | 1 | 13 | 0 | +| 18 | 3 | 36.76 s | 0 | 1 | 6 | 10 | 1 | 4 | 0 | +| 19 | 3 | 29.10 s | 0 | 1 | 9 | 11 | 1 | 2 | 0 | +| 20 | 3 | 51.36 s | 0 | 3 | 12 | 14 | 1 | 2 | 0 | \ No newline at end of file diff --git a/.keda/results_test_k8s_autoscaling_deployment_count_in_chaos.md b/.keda/results_test_k8s_autoscaling_deployment_count_in_chaos.md new file mode 100644 index 0000000000..a64e453d83 --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_deployment_count_in_chaos.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 4 | 53.63 s | 0 | 4 | 4 | 4 | 1 | 0 | 4 | +| 2 | 5 | 40.02 s | 0 | 2 | 5 | 6 | 1 | 1 | 5 | +| 3 | 5 | 16.09 s | 0 | 0 | 5 | 6 | 1 | 1 | 5 | +| 4 | 4 | 15.78 s | 0 | 0 | 4 | 6 | 1 | 2 | 4 | +| 5 | 5 | 50.10 s | 0 | 2 | 5 | 8 | 1 | 3 | 5 | +| 6 | 4 | 15.69 s | 0 | 0 | 4 | 8 | 1 | 4 | 4 | +| 7 | 4 | 15.70 s | 0 | 0 | 4 | 8 | 1 | 4 | 4 | +| 8 | 4 | 17.02 s | 0 | 0 | 4 | 7 | 1 | 3 | 4 | +| 9 | 4 | 16.09 s | 1 | -2 | 3 | 5 | 1 | 2 | 3 | +| 10 | 4 | 33.71 s | 0 | 2 | 4 | 7 | 1 | 3 | 4 | +| 11 | 4 | 15.71 s | 0 | 0 | 4 | 7 | 1 | 3 | 4 | +| 12 | 5 | 33.66 s | 0 | -1 | 5 | 6 | 1 | 1 | 5 | +| 13 | 3 | 6.64 s | 0 | 0 | 3 | 6 | 1 | 3 | 3 | +| 14 | 5 | 33.28 s | 0 | 1 | 5 | 7 | 1 | 2 | 5 | +| 15 | 4 | 15.86 s | 0 | 0 | 4 | 7 | 1 | 3 | 4 | +| 16 | 4 | 16.78 s | 0 | 0 | 4 | 7 | 1 | 3 | 4 | +| 17 | 3 | 7.01 s | 0 | 0 | 3 | 7 | 1 | 4 | 3 | +| 18 | 5 | 14.91 s | 2 | -2 | 3 | 5 | 1 | 2 | 3 | +| 19 | 5 | 48.14 s | 0 | 1 | 5 | 6 | 1 | 1 | 5 | +| 20 | 6 | 40.97 s | 0 | 2 | 6 | 8 | 1 | 2 | 3 | \ No newline at end of file diff --git a/.keda/results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md b/.keda/results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md new file mode 100644 index 0000000000..f7689b4ddd --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_deployment_count_with_node_max_sessions.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 2 | 35.37 s | 0 | 2 | 2 | 2 | 3 | 4 | 2 | +| 2 | 1 | 4.30 s | 0 | 0 | 1 | 2 | 3 | 5 | 0 | +| 3 | 3 | 49.48 s | 0 | 1 | 4 | 3 | 3 | 5 | 0 | +| 4 | 3 | 21.57 s | 0 | 0 | 7 | 3 | 3 | 2 | 0 | +| 5 | 2 | 40.77 s | 0 | 3 | 9 | 6 | 3 | 9 | 0 | +| 6 | 3 | 31.57 s | 0 | 3 | 12 | 9 | 3 | 15 | 12 | +| 7 | 1 | 5.18 s | 0 | 0 | 1 | 9 | 3 | 26 | 0 | +| 8 | 1 | 10.43 s | 0 | 0 | 2 | 9 | 3 | 25 | 0 | +| 9 | 1 | 17.07 s | 0 | 0 | 3 | 9 | 3 | 24 | 0 | +| 10 | 3 | 11.19 s | 0 | 0 | 6 | 9 | 3 | 21 | 0 | +| 11 | 3 | 6.64 s | 0 | 0 | 9 | 9 | 3 | 18 | 9 | +| 12 | 1 | 4.36 s | 0 | 0 | 1 | 9 | 3 | 26 | 0 | +| 13 | 3 | 15.31 s | 0 | 0 | 4 | 9 | 3 | 23 | 0 | +| 14 | 2 | 19.51 s | 0 | 0 | 6 | 9 | 3 | 21 | 0 | +| 15 | 3 | 26.13 s | 0 | 0 | 9 | 6 | 3 | 9 | 0 | +| 16 | 3 | 40.84 s | 0 | 4 | 12 | 7 | 3 | 9 | 12 | +| 17 | 2 | 7.10 s | 0 | 0 | 2 | 7 | 3 | 19 | 0 | +| 18 | 1 | 15.15 s | 0 | 0 | 3 | 7 | 3 | 18 | 0 | +| 19 | 3 | 49.19 s | 0 | 0 | 6 | 7 | 3 | 15 | 0 | +| 20 | 1 | 41.04 s | 0 | 3 | 7 | 10 | 3 | 23 | 0 | \ No newline at end of file diff --git a/.keda/results_test_k8s_autoscaling_job_count_strategy_default.md b/.keda/results_test_k8s_autoscaling_job_count_strategy_default.md new file mode 100644 index 0000000000..83a257c26f --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_job_count_strategy_default.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 1 | 30.83 s | 0 | 1 | 1 | 1 | 1 | 0 | 1 | +| 2 | 3 | 54.69 s | 0 | 3 | 3 | 3 | 1 | 0 | 0 | +| 3 | 1 | 34.33 s | 0 | 1 | 4 | 4 | 1 | 0 | 0 | +| 4 | 3 | 56.51 s | 0 | 3 | 7 | 7 | 1 | 0 | 0 | +| 5 | 2 | 43.24 s | 0 | 2 | 9 | 9 | 1 | 0 | 0 | +| 6 | 2 | 37.68 s | 0 | 2 | 11 | 11 | 1 | 0 | 11 | +| 7 | 2 | 39.21 s | 0 | 2 | 2 | 2 | 1 | 0 | 0 | +| 8 | 2 | 36.87 s | 0 | 2 | 4 | 4 | 1 | 0 | 0 | +| 9 | 3 | 53.17 s | 0 | 3 | 7 | 7 | 1 | 0 | 0 | +| 10 | 1 | 36.48 s | 0 | 1 | 8 | 8 | 1 | 0 | 0 | +| 11 | 2 | 45.94 s | 0 | 2 | 10 | 10 | 1 | 0 | 10 | +| 12 | 1 | 36.96 s | 0 | 1 | 1 | 1 | 1 | 0 | 0 | +| 13 | 1 | 36.73 s | 0 | 1 | 2 | 2 | 1 | 0 | 0 | +| 14 | 2 | 44.68 s | 0 | 2 | 4 | 4 | 1 | 0 | 0 | +| 15 | 1 | 53.12 s | 0 | 1 | 5 | 5 | 1 | 0 | 0 | +| 16 | 2 | 46.00 s | 0 | 2 | 7 | 7 | 1 | 0 | 7 | +| 17 | 3 | 42.35 s | 0 | 3 | 3 | 3 | 1 | 0 | 0 | +| 18 | 3 | 45.69 s | 0 | 3 | 6 | 6 | 1 | 0 | 0 | +| 19 | 2 | 42.02 s | 0 | 2 | 8 | 8 | 1 | 0 | 0 | +| 20 | 1 | 44.63 s | 0 | 1 | 9 | 9 | 1 | 0 | 0 | \ No newline at end of file diff --git a/.keda/results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md b/.keda/results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md new file mode 100644 index 0000000000..33bef17a55 --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_job_count_strategy_default_in_chaos.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 5 | 46.67 s | 0 | 5 | 5 | 5 | 1 | 0 | 5 | +| 2 | 6 | 56.52 s | 0 | 6 | 6 | 6 | 1 | 0 | 4 | +| 3 | 4 | 54.94 s | 0 | 4 | 6 | 6 | 1 | 0 | 4 | +| 4 | 4 | 57.87 s | 0 | 4 | 6 | 6 | 1 | 0 | 4 | +| 5 | 6 | 58.92 s | 0 | 6 | 8 | 8 | 1 | 0 | 8 | +| 6 | 5 | 44.86 s | 0 | 5 | 5 | 5 | 1 | 0 | 3 | +| 7 | 3 | 45.84 s | 0 | 3 | 5 | 5 | 1 | 0 | 4 | +| 8 | 3 | 51.35 s | 0 | 3 | 4 | 4 | 1 | 0 | 4 | +| 9 | 5 | 60.72 s | 0 | 5 | 5 | 5 | 1 | 0 | 5 | +| 10 | 6 | 55.06 s | 0 | 6 | 6 | 6 | 1 | 0 | 6 | +| 11 | 3 | 49.55 s | 0 | 3 | 3 | 3 | 1 | 0 | 3 | +| 12 | 6 | 32.39 s | 0 | 6 | 6 | 6 | 1 | 0 | 6 | +| 13 | 6 | 49.36 s | 0 | 6 | 6 | 6 | 1 | 0 | 3 | +| 14 | 3 | 49.50 s | 0 | 3 | 6 | 6 | 1 | 0 | 6 | +| 15 | 6 | 46.22 s | 0 | 6 | 6 | 6 | 1 | 0 | 6 | +| 16 | 6 | 40.57 s | 0 | 6 | 6 | 6 | 1 | 0 | 4 | +| 17 | 6 | 55.14 s | 0 | 6 | 8 | 8 | 1 | 0 | 8 | +| 18 | 3 | 47.84 s | 0 | 3 | 3 | 3 | 1 | 0 | 3 | +| 19 | 5 | 43.48 s | 0 | 5 | 5 | 5 | 1 | 0 | 5 | +| 20 | 3 | 44.11 s | 0 | 3 | 3 | 3 | 1 | 0 | 3 | \ No newline at end of file diff --git a/.keda/results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md b/.keda/results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md new file mode 100644 index 0000000000..f60c0ec6d0 --- /dev/null +++ b/.keda/results_test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions.md @@ -0,0 +1,22 @@ +| Iteration | New request sessions | Sessions created time | Sessions failed to create | New pods scaled up | Total running sessions | Total running pods | Max sessions per pod | Gaps | Sessions closed | +| --------- | -------------------- | --------------------- | ------------------------- | ------------------ | ---------------------- | ------------------ | -------------------- | ---- | --------------- | +| 1 | 1 | 34.05 s | 0 | 1 | 1 | 1 | 3 | 2 | 1 | +| 2 | 3 | 73.48 s | 0 | 2 | 3 | 3 | 3 | 6 | 0 | +| 3 | 1 | 6.75 s | 0 | 0 | 4 | 3 | 3 | 5 | 0 | +| 4 | 3 | 16.18 s | 0 | 0 | 7 | 3 | 3 | 2 | 0 | +| 5 | 1 | 6.45 s | 0 | 0 | 8 | 3 | 3 | 1 | 0 | +| 6 | 3 | 54.20 s | 0 | 5 | 11 | 8 | 3 | 13 | 11 | +| 7 | 2 | 34.96 s | 0 | 1 | 2 | 6 | 3 | 16 | 0 | +| 8 | 2 | 7.98 s | 0 | 0 | 4 | 6 | 3 | 14 | 0 | +| 9 | 2 | 17.20 s | 0 | 0 | 6 | 6 | 3 | 12 | 0 | +| 10 | 1 | 15.56 s | 0 | 0 | 7 | 6 | 3 | 11 | 0 | +| 11 | 1 | 4.99 s | 0 | 0 | 8 | 6 | 3 | 10 | 8 | +| 12 | 1 | 16.48 s | 0 | 0 | 1 | 5 | 3 | 14 | 0 | +| 13 | 1 | 6.87 s | 0 | 0 | 2 | 5 | 3 | 13 | 0 | +| 14 | 3 | 15.72 s | 0 | 0 | 5 | 5 | 3 | 10 | 0 | +| 15 | 2 | 42.55 s | 0 | 2 | 7 | 7 | 3 | 14 | 0 | +| 16 | 2 | 43.25 s | 0 | 1 | 9 | 8 | 3 | 15 | 9 | +| 17 | 3 | 15.73 s | 0 | 0 | 3 | 4 | 3 | 9 | 0 | +| 18 | 3 | 43.50 s | 0 | 3 | 6 | 7 | 3 | 15 | 0 | +| 19 | 3 | 74.96 s | 0 | 1 | 9 | 8 | 3 | 15 | 0 | +| 20 | 1 | 7.99 s | 0 | 0 | 10 | 8 | 3 | 14 | 0 | \ No newline at end of file diff --git a/.keda/scalers/selenium_grid_scaler.go b/.keda/scalers/selenium_grid_scaler.go index 11a65a938a..bb18d7c976 100644 --- a/.keda/scalers/selenium_grid_scaler.go +++ b/.keda/scalers/selenium_grid_scaler.go @@ -39,8 +39,9 @@ type seleniumGridScalerMetadata struct { BrowserVersion string `keda:"name=browserVersion, order=triggerMetadata, default=latest"` UnsafeSsl bool `keda:"name=unsafeSsl, order=triggerMetadata, default=false"` PlatformName string `keda:"name=platformName, order=triggerMetadata, default=linux"` - NodeMaxSessions int `keda:"name=nodeMaxSessions, order=triggerMetadata, default=1"` - TargetQueueLength int64 `keda:"name=targetQueueLength, order=triggerMetadata;resolvedEnv, default=1"` + NodeMaxSessions int64 `keda:"name=nodeMaxSessions, order=triggerMetadata, default=1"` + + TargetValue int64 } type SeleniumResponse struct { @@ -54,9 +55,9 @@ type Data struct { } type Grid struct { - SessionCount int `json:"sessionCount"` - MaxSession int `json:"maxSession"` - TotalSlots int `json:"totalSlots"` + SessionCount int64 `json:"sessionCount"` + MaxSession int64 `json:"maxSession"` + TotalSlots int64 `json:"totalSlots"` } type NodesInfo struct { @@ -70,17 +71,17 @@ type SessionsInfo struct { type Nodes []struct { ID string `json:"id"` Status string `json:"status"` - SessionCount int `json:"sessionCount"` - MaxSession int `json:"maxSession"` - SlotCount int `json:"slotCount"` + SessionCount int64 `json:"sessionCount"` + MaxSession int64 `json:"maxSession"` + SlotCount int64 `json:"slotCount"` Stereotypes string `json:"stereotypes"` Sessions Sessions `json:"sessions"` } type ReservedNodes struct { ID string `json:"id"` - MaxSession int `json:"maxSession"` - SlotCount int `json:"slotCount"` + MaxSession int64 `json:"maxSession"` + SlotCount int64 `json:"slotCount"` } type Sessions []struct { @@ -101,13 +102,12 @@ type Capability struct { } type Stereotypes []struct { - Slots int `json:"slots"` + Slots int64 `json:"slots"` Stereotype Capability `json:"stereotype"` } const ( - DefaultBrowserVersion string = "latest" - DefaultTargetQueueLength int64 = 1 + DefaultBrowserVersion string = "latest" ) func NewSeleniumGridScaler(config *scalersconfig.ScalerConfig) (Scaler, error) { @@ -135,7 +135,9 @@ func NewSeleniumGridScaler(config *scalersconfig.ScalerConfig) (Scaler, error) { } func parseSeleniumGridScalerMetadata(config *scalersconfig.ScalerConfig) (*seleniumGridScalerMetadata, error) { - meta := &seleniumGridScalerMetadata{} + meta := &seleniumGridScalerMetadata{ + TargetValue: 1, + } if err := config.TypedConfig(meta); err != nil { return nil, fmt.Errorf("error parsing prometheus metadata: %w", err) @@ -147,9 +149,6 @@ func parseSeleniumGridScalerMetadata(config *scalersconfig.ScalerConfig) (*selen meta.SessionBrowserName = meta.BrowserName } - if meta.TargetQueueLength < 1 { - meta.TargetQueueLength = DefaultTargetQueueLength - } return meta, nil } @@ -162,15 +161,14 @@ func (s *seleniumGridScaler) Close(context.Context) error { } func (s *seleniumGridScaler) GetMetricsAndActivity(ctx context.Context, metricName string) ([]external_metrics.ExternalMetricValue, bool, error) { - queueLen, err := s.getSessionsQueueLength(ctx, s.logger) + newRequestNodes, onGoingSessions, err := s.getSessionsQueueLength(ctx, s.logger) if err != nil { return []external_metrics.ExternalMetricValue{}, false, fmt.Errorf("error requesting selenium grid endpoint: %w", err) } - metric := GenerateMetricInMili(metricName, float64(queueLen)) + metric := GenerateMetricInMili(metricName, float64(newRequestNodes+onGoingSessions)) - // If the number of sessions queued is equal to or greater than the targetQueueLength, the scaler will scale up. - return []external_metrics.ExternalMetricValue{metric}, queueLen >= s.metadata.TargetQueueLength, nil + return []external_metrics.ExternalMetricValue{metric}, (newRequestNodes + onGoingSessions) > s.metadata.ActivationThreshold, nil } func (s *seleniumGridScaler) GetMetricSpecForScaling(context.Context) []v2.MetricSpec { @@ -179,7 +177,7 @@ func (s *seleniumGridScaler) GetMetricSpecForScaling(context.Context) []v2.Metri Metric: v2.MetricIdentifier{ Name: GenerateMetricNameWithIndex(s.metadata.triggerIndex, metricName), }, - Target: GetMetricTarget(s.metricType, s.metadata.TargetQueueLength), + Target: GetMetricTarget(s.metricType, s.metadata.TargetValue), } metricSpec := v2.MetricSpec{ External: externalMetric, Type: externalMetricType, @@ -187,18 +185,18 @@ func (s *seleniumGridScaler) GetMetricSpecForScaling(context.Context) []v2.Metri return []v2.MetricSpec{metricSpec} } -func (s *seleniumGridScaler) getSessionsQueueLength(ctx context.Context, logger logr.Logger) (int64, error) { +func (s *seleniumGridScaler) getSessionsQueueLength(ctx context.Context, logger logr.Logger) (int64, int64, error) { body, err := json.Marshal(map[string]string{ "query": "{ grid { sessionCount, maxSession, totalSlots }, nodesInfo { nodes { id, status, sessionCount, maxSession, slotCount, stereotypes, sessions { id, capabilities, slot { id, stereotype } } } }, sessionsInfo { sessionQueueRequests } }", }) if err != nil { - return -1, err + return -1, -1, err } req, err := http.NewRequestWithContext(ctx, "POST", s.metadata.URL, bytes.NewBuffer(body)) if err != nil { - return -1, err + return -1, -1, err } if (s.metadata.AuthType == "" || strings.EqualFold(s.metadata.AuthType, "Basic")) && s.metadata.Username != "" && s.metadata.Password != "" { @@ -209,28 +207,28 @@ func (s *seleniumGridScaler) getSessionsQueueLength(ctx context.Context, logger res, err := s.httpClient.Do(req) if err != nil { - return -1, err + return -1, -1, err } if res.StatusCode != http.StatusOK { msg := fmt.Sprintf("selenium grid returned %d", res.StatusCode) - return -1, errors.New(msg) + return -1, -1, errors.New(msg) } defer res.Body.Close() b, err := io.ReadAll(res.Body) if err != nil { - return -1, err + return -1, -1, err } - v, err := getCountFromSeleniumResponse(b, s.metadata.BrowserName, s.metadata.BrowserVersion, s.metadata.SessionBrowserName, s.metadata.PlatformName, s.metadata.NodeMaxSessions, logger) + newRequestNodes, onGoingSession, err := getCountFromSeleniumResponse(b, s.metadata.BrowserName, s.metadata.BrowserVersion, s.metadata.SessionBrowserName, s.metadata.PlatformName, s.metadata.NodeMaxSessions, logger) if err != nil { - return -1, err + return -1, -1, err } - return v, nil + return newRequestNodes, onGoingSession, nil } -func countMatchingSlotsStereotypes(stereotypes Stereotypes, request Capability, browserName string, browserVersion string, sessionBrowserName string, platformName string) int { - var matchingSlots int +func countMatchingSlotsStereotypes(stereotypes Stereotypes, request Capability, browserName string, browserVersion string, sessionBrowserName string, platformName string) int64 { + var matchingSlots int64 for _, stereotype := range stereotypes { if checkCapabilitiesMatch(stereotype.Stereotype, request, browserName, browserVersion, sessionBrowserName, platformName) { matchingSlots += stereotype.Slots @@ -239,8 +237,8 @@ func countMatchingSlotsStereotypes(stereotypes Stereotypes, request Capability, return matchingSlots } -func countMatchingSessions(sessions Sessions, request Capability, browserName string, browserVersion string, sessionBrowserName string, platformName string, logger logr.Logger) int { - var matchingSessions int +func countMatchingSessions(sessions Sessions, request Capability, browserName string, browserVersion string, sessionBrowserName string, platformName string, logger logr.Logger) int64 { + var matchingSessions int64 for _, session := range sessions { var capability = Capability{} if err := json.Unmarshal([]byte(session.Capabilities), &capability); err == nil { @@ -277,7 +275,7 @@ func checkCapabilitiesMatch(capability Capability, requestCapability Capability, return browserNameMatches && browserVersionMatches && platformNameMatches } -func checkNodeReservedSlots(reservedNodes []ReservedNodes, nodeID string, availableSlots int) int { +func checkNodeReservedSlots(reservedNodes []ReservedNodes, nodeID string, availableSlots int64) int64 { for _, reservedNode := range reservedNodes { if strings.EqualFold(reservedNode.ID, nodeID) { return reservedNode.SlotCount @@ -286,7 +284,7 @@ func checkNodeReservedSlots(reservedNodes []ReservedNodes, nodeID string, availa return availableSlots } -func updateOrAddReservedNode(reservedNodes []ReservedNodes, nodeID string, slotCount int, maxSession int) []ReservedNodes { +func updateOrAddReservedNode(reservedNodes []ReservedNodes, nodeID string, slotCount int64, maxSession int64) []ReservedNodes { for i, reservedNode := range reservedNodes { if strings.EqualFold(reservedNode.ID, nodeID) { // Update remaining available slots for the reserved node @@ -298,17 +296,15 @@ func updateOrAddReservedNode(reservedNodes []ReservedNodes, nodeID string, slotC return append(reservedNodes, ReservedNodes{ID: nodeID, SlotCount: slotCount, MaxSession: maxSession}) } -func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion string, sessionBrowserName string, platformName string, nodeMaxSessions int, logger logr.Logger) (int64, error) { - // The returned count of the number of new Nodes will be scaled up - var count int64 +func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion string, sessionBrowserName string, platformName string, nodeMaxSessions int64, logger logr.Logger) (int64, int64, error) { // Track number of available slots of existing Nodes in the Grid can be reserved for the matched requests - var availableSlots int + var availableSlots int64 // Track number of matched requests in the sessions queue will be served by this scaler - var queueSlots int + var queueSlots int64 var seleniumResponse = SeleniumResponse{} if err := json.Unmarshal(b, &seleniumResponse); err != nil { - return 0, err + return 0, 0, err } var sessionQueueRequests = seleniumResponse.Data.SessionsInfo.SessionQueueRequests @@ -317,6 +313,7 @@ func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion s var reservedNodes []ReservedNodes // Track list of new Nodes will be scaled up with number of available slots following scaler parameter `nodeMaxSessions` var newRequestNodes []ReservedNodes + var onGoingSessions int64 for requestIndex, sessionQueueRequest := range sessionQueueRequests { var isRequestMatched bool var requestCapability = Capability{} @@ -335,20 +332,22 @@ func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion s } var isRequestReserved bool + var sumOfCurrentSessionsMatch int64 // Check if the matched request can be assigned to available slots of existing Nodes in the Grid for _, node := range nodes { + // Count ongoing sessions that match the request capability and scaler metadata + var currentSessionsMatch = countMatchingSessions(node.Sessions, requestCapability, browserName, browserVersion, sessionBrowserName, platformName, logger) + sumOfCurrentSessionsMatch += currentSessionsMatch // Check if node is UP and has available slots (maxSession > sessionCount) if strings.EqualFold(node.Status, "UP") && checkNodeReservedSlots(reservedNodes, node.ID, node.MaxSession-node.SessionCount) > 0 { var stereotypes = Stereotypes{} - var availableSlotsMatch int + var availableSlotsMatch int64 if err := json.Unmarshal([]byte(node.Stereotypes), &stereotypes); err == nil { // Count available slots that match the request capability and scaler metadata availableSlotsMatch += countMatchingSlotsStereotypes(stereotypes, requestCapability, browserName, browserVersion, sessionBrowserName, platformName) } else { logger.Error(err, fmt.Sprintf("Error when unmarshaling node stereotypes: %s", err)) } - // Count ongoing sessions that match the request capability and scaler metadata - var currentSessionsMatch = countMatchingSessions(node.Sessions, requestCapability, browserName, browserVersion, sessionBrowserName, platformName, logger) // Count remaining available slots can be reserved for this request var availableSlotsCanBeReserved = checkNodeReservedSlots(reservedNodes, node.ID, node.MaxSession-node.SessionCount) // Reserve one available slot for the request if available slots match is greater than current sessions match @@ -360,6 +359,9 @@ func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion s } } } + if sumOfCurrentSessionsMatch > onGoingSessions { + onGoingSessions = sumOfCurrentSessionsMatch + } // Check if the matched request can be assigned to available slots of new Nodes will be scaled up, since the scaler parameter `nodeMaxSessions` can be greater than 1 if !isRequestReserved { for _, newRequestNode := range newRequestNodes { @@ -376,11 +378,5 @@ func getCountFromSeleniumResponse(b []byte, browserName string, browserVersion s } } - if queueSlots > availableSlots { - count = int64(len(newRequestNodes)) - } else { - count = 0 - } - - return count, nil + return int64(len(newRequestNodes)), onGoingSessions, nil } diff --git a/.keda/scalers/selenium_grid_scaler_test.go b/.keda/scalers/selenium_grid_scaler_test.go index 075691e152..6613be242c 100644 --- a/.keda/scalers/selenium_grid_scaler_test.go +++ b/.keda/scalers/selenium_grid_scaler_test.go @@ -16,13 +16,14 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { sessionBrowserName string browserVersion string platformName string - nodeMaxSessions int + nodeMaxSessions int64 } tests := []struct { - name string - args args - want int64 - wantErr bool + name string + args args + wantNewRequestNodes int64 + wantOnGoingSessions int64 + wantErr bool }{ { name: "nil response body should throw error", @@ -61,8 +62,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { `), browserName: "", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "12 sessionQueueRequests with 4 requests matching browserName chrome should return count as 4", @@ -101,8 +102,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 4, - wantErr: false, + wantNewRequestNodes: 4, + wantErr: false, }, { name: "2 sessionQueueRequests and 1 available nodeStereotypes with matching browserName firefox should return count as 1", @@ -276,8 +277,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 4, + wantErr: false, }, { name: "1 sessionQueueRequests and 1 available nodeStereotypes with matching browserName chrome should return count as 0", @@ -325,8 +327,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "1 sessionQueueRequests Linux and 1 available nodeStereotypes Windows with matching browserName chrome should return count as 1", @@ -374,8 +376,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantErr: false, }, { name: "scaler browserVersion is latest, 2 sessionQueueRequests wihtout browserVersion, 2 available nodeStereotypes with different versions and platforms, should return count as 1", @@ -422,8 +424,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantErr: false, }, { name: "scaler browserVersion is latest, 5 sessionQueueRequests wihtout browserVersion also 1 different platformName, 1 available nodeStereotypes with 3 slots Linux and 1 node Windows, should return count as 1", @@ -473,8 +475,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantErr: false, }, { name: "queue request with browserName browserVersion and browserVersion but no available nodes should return count as 1", @@ -516,8 +518,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantErr: false, }, { name: "1 queue request with browserName browserVersion and browserVersion but 2 nodes without available slots should return count as 1", @@ -573,8 +575,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "2 session queue with matching browsername and browserversion of 2 available slots should return count as 0", @@ -621,8 +624,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "2 queue requests with browserName browserVersion and platformName matching 2 available slots on 2 different nodes should return count as 0", @@ -679,8 +682,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "1 queue request with browserName browserVersion and platformName matching 1 available slot on node has 3 max sessions should return count as 0", @@ -726,8 +730,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "3 queue requests with browserName browserVersion and platformName but 2 running nodes are busy should return count as 3", @@ -785,8 +790,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 3, - wantErr: false, + wantNewRequestNodes: 3, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "3 queue requests with browserName browserVersion and platformName but 2 running nodes are busy with different versions should return count as 3", @@ -844,8 +850,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 3, - wantErr: false, + wantNewRequestNodes: 3, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "3 queue requests with browserName and platformName but 2 running nodes are busy with different versions should return count as 3", @@ -903,8 +910,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 3, - wantErr: false, + wantNewRequestNodes: 3, + wantOnGoingSessions: 2, + wantErr: false, }, { name: "1 active session with matching browsername and version should return count as 2", @@ -947,8 +955,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "1 request without browserName and browserVersion stable can be match any available node should return count as 0", @@ -985,8 +994,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "1 request without browserName and browserVersion stable should return count as 1", @@ -1028,8 +1037,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "2 queue requests with browserName in string match node stereotype and scaler metadata browserVersion should return count as 1", @@ -1072,8 +1082,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "dev", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "2 queue requests with matching browsername/sessionBrowserName but 1 node is busy should return count as 2", @@ -1116,8 +1127,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "2 queue requests with matching browsername/sessionBrowserName and 1 node is is available should return count as 1", @@ -1155,8 +1167,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantErr: false, }, { name: "2 queue requests with platformName and without platformName and node with 1 slot available should return count as 1", args: args{ @@ -1198,8 +1210,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "Windows 11", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "1 active msedge session while asking for 2 chrome sessions should return a count of 2", @@ -1242,8 +1255,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantErr: false, }, { name: "3 queue requests browserName chrome platformName linux but 1 node has maxSessions=3 with browserName msedge should return a count of 3", @@ -1287,8 +1300,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 3, - wantErr: false, + wantNewRequestNodes: 3, + wantErr: false, }, { name: "session request with matching browsername and no specific platformName should return count as 2", @@ -1316,8 +1329,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "", }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantErr: false, }, { name: "2 queue requests with 1 matching browsername and platformName and 1 existing slot is available should return count as 0", @@ -1355,8 +1368,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "Windows 11", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "2 queue requests with 1 request matching browserName and platformName but 1 existing node is busy should return count as 1", @@ -1403,8 +1416,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0", platformName: "Windows 11", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 1, + wantErr: false, }, { name: "5 queue requests with scaler parameter nodeMaxSessions is 2 should return count as 3", @@ -1437,8 +1451,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { platformName: "linux", nodeMaxSessions: 2, }, - want: 3, - wantErr: false, + wantNewRequestNodes: 3, + wantErr: false, }, { name: "5 queue requests with scaler parameter nodeMaxSessions is 3 should return count as 2", @@ -1471,8 +1485,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { platformName: "linux", nodeMaxSessions: 3, }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantErr: false, }, { name: "5 queue requests with request matching browserName and platformName and scaler param nodeMaxSessions is 3 and existing node with 1 available slot should return count as 2", @@ -1523,8 +1537,9 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { platformName: "linux", nodeMaxSessions: 3, }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantOnGoingSessions: 2, + wantErr: false, }, // Tests from PR: https://github.com/kedacore/keda/pull/6055 { @@ -1563,8 +1578,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 0, - wantErr: false, + wantNewRequestNodes: 0, + wantErr: false, }, { name: "4 sessions requests with matching browsername and platformName when setSessionsFromHub turned on and node with 2 slots matches should return count as 2", @@ -1605,8 +1620,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "latest", platformName: "linux", }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantErr: false, }, { name: "4 sessions requests with matching browsername and platformName when setSessionsFromHub turned on, no nodes and sessionsPerNode=2 matches should return count as 2", @@ -1637,8 +1652,8 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { platformName: "linux", nodeMaxSessions: 2, }, - want: 2, - wantErr: false, + wantNewRequestNodes: 2, + wantErr: false, }, { name: "sessions requests and active sessions with 1 matching browsername, platformName and sessionBrowserVersion should return count as 1", @@ -1687,19 +1702,20 @@ func Test_getCountFromSeleniumResponse(t *testing.T) { browserVersion: "91.0.4472.114", platformName: "linux", }, - want: 1, - wantErr: false, + wantNewRequestNodes: 1, + wantOnGoingSessions: 2, + wantErr: false, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - got, err := getCountFromSeleniumResponse(tt.args.b, tt.args.browserName, tt.args.browserVersion, tt.args.sessionBrowserName, tt.args.platformName, tt.args.nodeMaxSessions, logr.Discard()) + newRequestNodes, onGoingSessions, err := getCountFromSeleniumResponse(tt.args.b, tt.args.browserName, tt.args.browserVersion, tt.args.sessionBrowserName, tt.args.platformName, tt.args.nodeMaxSessions, logr.Discard()) if (err != nil) != tt.wantErr { t.Errorf("getCountFromSeleniumResponse() error = %v, wantErr %v", err, tt.wantErr) return } - if !reflect.DeepEqual(got, tt.want) { - t.Errorf("getCountFromSeleniumResponse() = %v, want %v", got, tt.want) + if !reflect.DeepEqual(newRequestNodes, tt.wantNewRequestNodes) || !reflect.DeepEqual(onGoingSessions, tt.wantOnGoingSessions) { + t.Errorf("getCountFromSeleniumResponse() = [%v, %v], want [%v, %v]", newRequestNodes, onGoingSessions, tt.wantNewRequestNodes, tt.wantOnGoingSessions) } }) } @@ -1750,7 +1766,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, BrowserVersion: "latest", PlatformName: "linux", NodeMaxSessions: 1, @@ -1772,7 +1788,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "MicrosoftEdge", SessionBrowserName: "msedge", - TargetQueueLength: 1, + TargetValue: 1, BrowserVersion: "latest", PlatformName: "linux", NodeMaxSessions: 1, @@ -1800,7 +1816,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { Password: "password", BrowserName: "MicrosoftEdge", SessionBrowserName: "msedge", - TargetQueueLength: 1, + TargetValue: 1, BrowserVersion: "latest", PlatformName: "linux", NodeMaxSessions: 1, @@ -1826,7 +1842,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "MicrosoftEdge", SessionBrowserName: "msedge", - TargetQueueLength: 1, + TargetValue: 1, BrowserVersion: "latest", PlatformName: "linux", Username: "username", @@ -1851,7 +1867,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, BrowserVersion: "91.0", UnsafeSsl: false, PlatformName: "linux", @@ -1876,7 +1892,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -1917,7 +1933,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -1944,7 +1960,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { URL: "http://selenium-hub:4444/graphql", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -1978,7 +1994,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { Password: "password", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -2013,7 +2029,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { Password: "password", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -2048,7 +2064,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { AccessToken: "my-access-token", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, @@ -2082,7 +2098,7 @@ func Test_parseSeleniumGridScalerMetadata(t *testing.T) { AccessToken: "my-access-token", BrowserName: "chrome", SessionBrowserName: "chrome", - TargetQueueLength: 1, + TargetValue: 1, ActivationThreshold: 10, BrowserVersion: "91.0", UnsafeSsl: true, diff --git a/Makefile b/Makefile index 01443f95fd..931401c43d 100644 --- a/Makefile +++ b/Makefile @@ -29,7 +29,7 @@ SBOM_OUTPUT := $(or $(SBOM_OUTPUT),$(SBOM_OUTPUT),package_versions.txt) KEDA_TAG_PREV_VERSION := $(or $(KEDA_TAG_PREV_VERSION),$(KEDA_TAG_PREV_VERSION),2.16.0-selenium-grid) KEDA_TAG_VERSION := $(or $(KEDA_TAG_VERSION),$(KEDA_TAG_VERSION),2.16.0-selenium-grid) KEDA_BASED_NAME := $(or $(KEDA_BASED_NAME),$(KEDA_BASED_NAME),ndviet) -KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241127) +KEDA_BASED_TAG := $(or $(KEDA_BASED_TAG),$(KEDA_BASED_TAG),2.16.0-selenium-grid-20241201) all: hub \ distributor \ @@ -961,6 +961,36 @@ chart_test_autoscaling_playwright_connect_grid: TEMPLATE_OUTPUT_FILENAME="k8s_playwright_connect_grid_basicAuth_secureIngress_ingressPublicIP_autoScaling_patchKEDA.yaml" \ ./tests/charts/make/chart_test.sh JobAutoscaling +test_k8s_autoscaling_job_count_strategy_default_in_chaos: + MATRIX_TESTS=AutoScalingTestsScaleChaos \ + make test_k8s_autoscaling_job_count_strategy_default + +test_k8s_autoscaling_job_count_strategy_default_with_node_max_sessions: + TEST_NODE_MAX_SESSIONS=3 \ + make test_k8s_autoscaling_job_count_strategy_default + +test_k8s_autoscaling_job_count_strategy_default: + MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") SCALING_STRATEGY=$(or $(SCALING_STRATEGY), "default") \ + PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \ + SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \ + VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \ + ./tests/charts/make/chart_test.sh JobAutoscaling + +test_k8s_autoscaling_deployment_count_in_chaos: + MATRIX_TESTS=AutoScalingTestsScaleChaos \ + make test_k8s_autoscaling_deployment_count + +test_k8s_autoscaling_deployment_count_with_node_max_sessions: + TEST_NODE_MAX_SESSIONS=3 \ + make test_k8s_autoscaling_deployment_count + +test_k8s_autoscaling_deployment_count: + MATRIX_TESTS=$(or $(MATRIX_TESTS), "AutoscalingTestsScaleUp") \ + PLATFORMS=$(PLATFORMS) RELEASE_NAME=selenium TEST_PATCHED_KEDA=true SELENIUM_GRID_PROTOCOL=http SELENIUM_GRID_HOST=localhost SELENIUM_GRID_PORT=80 \ + SELENIUM_GRID_MONITORING=false CLEAR_POD_HISTORY=true SET_MAX_REPLICAS=100 ENABLE_VIDEO_RECORDER=false \ + VERSION=$(TAG_VERSION) VIDEO_TAG=$(FFMPEG_TAG_VERSION)-$(BUILD_DATE) KEDA_BASED_NAME=$(KEDA_BASED_NAME) KEDA_BASED_TAG=$(KEDA_BASED_TAG) NAMESPACE=$(NAMESPACE) BINDING_VERSION=$(BINDING_VERSION) BASE_VERSION=$(BASE_VERSION) \ + ./tests/charts/make/chart_test.sh DeploymentAutoscaling + chart_test_delete: helm del test -n selenium || true helm del selenium -n selenium || true diff --git a/charts/selenium-grid/CONFIGURATION.md b/charts/selenium-grid/CONFIGURATION.md index 1f346e4685..3f084018a0 100644 --- a/charts/selenium-grid/CONFIGURATION.md +++ b/charts/selenium-grid/CONFIGURATION.md @@ -337,11 +337,11 @@ A Helm chart for creating a Selenium Grid Server in Kubernetes | autoscaling.patchObjectFinalizers.serviceAccount | string | `""` | Define an external service account name contains permissions to patch KEDA scaled resources | | autoscaling.patchObjectFinalizers.imagePullSecret | string | `""` | Custom pull secret for container in patch job | | autoscaling.patchObjectFinalizers.resources | object | `{"limits":{"cpu":"200m","memory":"500Mi"},"requests":{"cpu":"100m","memory":"200Mi"}}` | Define resources for container in patch job | -| autoscaling.scaledOptions | object | `{"maxReplicaCount":8,"minReplicaCount":0,"pollingInterval":10}` | Options for KEDA scaled resources (keep only common options used for both ScaledJob and ScaledObject) | +| autoscaling.scaledOptions | object | `{"maxReplicaCount":24,"minReplicaCount":0,"pollingInterval":20}` | Options for KEDA scaled resources (keep only common options used for both ScaledJob and ScaledObject) | | autoscaling.scaledOptions.minReplicaCount | int | `0` | Minimum number of replicas | -| autoscaling.scaledOptions.maxReplicaCount | int | `8` | Maximum number of replicas | -| autoscaling.scaledOptions.pollingInterval | int | `10` | Polling interval in seconds | -| autoscaling.scaledJobOptions.scalingStrategy.strategy | string | `"eager"` | Scaling strategy for KEDA ScaledJob - https://keda.sh/docs/latest/reference/scaledjob-spec/#scalingstrategy | +| autoscaling.scaledOptions.maxReplicaCount | int | `24` | Maximum number of replicas | +| autoscaling.scaledOptions.pollingInterval | int | `20` | Polling interval in seconds | +| autoscaling.scaledJobOptions.scalingStrategy.strategy | string | `"default"` | Scaling strategy for KEDA ScaledJob - https://keda.sh/docs/latest/reference/scaledjob-spec/#scalingstrategy | | autoscaling.scaledJobOptions.successfulJobsHistoryLimit | int | `0` | Number of Completed jobs should be kept | | autoscaling.scaledJobOptions.failedJobsHistoryLimit | int | `0` | Number of Failed jobs should be kept (for troubleshooting purposes) | | autoscaling.scaledJobOptions.jobTargetRef | object | `{"backoffLimit":0,"completions":1,"parallelism":1}` | Specify job target ref for KEDA ScaledJob | diff --git a/charts/selenium-grid/values.yaml b/charts/selenium-grid/values.yaml index df6e214cca..a48ad54f84 100644 --- a/charts/selenium-grid/values.yaml +++ b/charts/selenium-grid/values.yaml @@ -877,17 +877,16 @@ autoscaling: # -- Minimum number of replicas minReplicaCount: 0 # -- Maximum number of replicas - maxReplicaCount: 8 + maxReplicaCount: 24 # -- Polling interval in seconds - pollingInterval: 10 + pollingInterval: 20 # List of triggers. Be careful, the default trigger of selenium-grid will be overwritten if you specify this # triggers: # Options for KEDA ScaledJobs (only used when scalingType is set to "job"). See https://keda.sh/docs/latest/concepts/scaling-jobs/#scaledjob-spec scaledJobOptions: scalingStrategy: - # Use `eager` strategy for utilizing all available slots up to the maxReplicaCount, ensuring that waiting request are processed as quickly as possible. # -- Scaling strategy for KEDA ScaledJob - https://keda.sh/docs/latest/reference/scaledjob-spec/#scalingstrategy - strategy: eager + strategy: default # -- Number of Completed jobs should be kept successfulJobsHistoryLimit: 0 # -- Number of Failed jobs should be kept (for troubleshooting purposes) diff --git a/tests/AutoscalingTests/__init__.py b/tests/AutoscalingTests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/AutoscalingTests/common.py b/tests/AutoscalingTests/common.py new file mode 100644 index 0000000000..a6d7426b7e --- /dev/null +++ b/tests/AutoscalingTests/common.py @@ -0,0 +1,95 @@ +import unittest +import random +import time +import subprocess +import signal +import concurrent.futures +import csv +import os +from selenium import webdriver +from selenium.webdriver.firefox.options import Options as FirefoxOptions +from selenium.webdriver.edge.options import Options as EdgeOptions +from selenium.webdriver.chrome.options import Options as ChromeOptions +from selenium.webdriver.remote.client_config import ClientConfig +from csv2md.table import Table + +BROWSER = { + "chrome": ChromeOptions(), + "firefox": FirefoxOptions(), + "edge": EdgeOptions(), +} + +REMOTE_SERVER_ADDR = os.getenv("REMOTE_SERVER_ADDR", "http://localhost/selenium/wd/hub") + +CLIENT_CONFIG = ClientConfig( + remote_server_addr=REMOTE_SERVER_ADDR, + keep_alive=True, + timeout=3600, +) + +FIELD_NAMES = ["Iteration", "New request sessions", "Sessions created time", "Sessions failed to create", "New pods scaled up", "Total running sessions", "Total running pods", "Max sessions per pod", "Gaps", "Sessions closed"] + +def get_pod_count(): + result = subprocess.run(["kubectl", "get", "pods", "-A", "--no-headers"], capture_output=True, text=True) + return len([line for line in result.stdout.splitlines() if "selenium-node-" in line and "Running" in line]) + +def create_session(browser_name): + return webdriver.Remote(command_executor=CLIENT_CONFIG.remote_server_addr, options=BROWSER[browser_name], client_config=CLIENT_CONFIG) + +def wait_for_count_matches(sessions, timeout=10, interval=5): + elapsed = 0 + while elapsed < timeout: + pod_count = get_pod_count() + if pod_count == len(sessions): + break + print(f"VALIDATING: Waiting for pods to match sessions... ({elapsed}/{timeout} seconds elapsed)") + time.sleep(interval) + elapsed += interval + if pod_count != len(sessions): + print(f"WARN: Mismatch between pod count and session count after {timeout} seconds. Gaps: {pod_count - len(sessions)}") + else: + print(f"PASS: Pod count matches session count after {elapsed} seconds.") + +def close_all_sessions(sessions): + for session in sessions: + session.quit() + sessions.clear() + return sessions + +def create_sessions_in_parallel(new_request_sessions): + failed_jobs = 0 + with concurrent.futures.ThreadPoolExecutor() as executor: + futures = [executor.submit(create_session, random.choice(list(BROWSER.keys()))) for _ in range(new_request_sessions)] + sessions = [] + for future in concurrent.futures.as_completed(futures): + try: + sessions.append(future.result()) + except Exception as e: + print(f"ERROR: Failed to create session: {e}") + failed_jobs += 1 + print(f"Total failed jobs: {failed_jobs}") + return sessions + +def randomly_quit_sessions(sessions, sublist_size): + if sessions: + sessions_to_quit = random.sample(sessions, min(sublist_size, len(sessions))) + for session in sessions_to_quit: + session.quit() + sessions.remove(session) + print(f"QUIT: {len(sessions_to_quit)} sessions have been randomly quit.") + return len(sessions_to_quit) + +def get_result_file_name(): + return f"tests/autoscaling_results" + +def export_results_to_csv(output_file, field_names, results): + with open(output_file, mode="w") as csvfile: + writer = csv.DictWriter(csvfile, fieldnames=field_names) + writer.writeheader() + writer.writerows(results) + +def export_results_csv_to_md(csv_file, md_file): + with open(csv_file) as f: + table = Table.parse_csv(f) + with open(md_file, mode="w") as f: + f.write(table.markdown()) diff --git a/tests/AutoscalingTests/test_scale_chaos.py b/tests/AutoscalingTests/test_scale_chaos.py new file mode 100644 index 0000000000..c28d488839 --- /dev/null +++ b/tests/AutoscalingTests/test_scale_chaos.py @@ -0,0 +1,62 @@ +import unittest +import random +import time +import signal +import csv +from csv2md.table import Table +from .common import * + +SESSIONS = [] +RESULTS = [] +TEST_NODE_MAX_SESSIONS = int(os.getenv("TEST_NODE_MAX_SESSIONS", 1)) +TEST_AUTOSCALING_ITERATIONS = int(os.getenv("TEST_AUTOSCALING_ITERATIONS", 20)) + +def signal_handler(signum, frame): + print("Signal received, quitting all sessions...") + close_all_sessions(SESSIONS) + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +class SeleniumAutoscalingTests(unittest.TestCase): + def test_run_tests(self): + try: + for iteration in range(TEST_AUTOSCALING_ITERATIONS): + new_request_sessions = random.randint(3, 6) + start_time = time.time() + start_pods = get_pod_count() + new_sessions = create_sessions_in_parallel(new_request_sessions) + failed_sessions = new_request_sessions - len(new_sessions) + end_time = time.time() + stop_pods = get_pod_count() + SESSIONS.extend(new_sessions) + elapsed_time = end_time - start_time + new_scaled_pods = stop_pods - start_pods + total_sessions = len(SESSIONS) + total_pods = get_pod_count() + print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.") + print(f"INFO: Total sessions: {total_sessions}") + print(f"INFO: Total pods: {total_pods}") + closed_session = randomly_quit_sessions(SESSIONS, random.randint(3, 12)) + RESULTS.append({ + FIELD_NAMES[0]: iteration + 1, + FIELD_NAMES[1]: new_request_sessions, + FIELD_NAMES[2]: f"{elapsed_time:.2f} s", + FIELD_NAMES[3]: failed_sessions, + FIELD_NAMES[4]: new_scaled_pods, + FIELD_NAMES[5]: total_sessions, + FIELD_NAMES[6]: total_pods, + FIELD_NAMES[7]: TEST_NODE_MAX_SESSIONS, + FIELD_NAMES[8]: (total_pods * TEST_NODE_MAX_SESSIONS) - total_sessions, + FIELD_NAMES[9]: closed_session, + }) + time.sleep(15) + finally: + print(f"FINISH: Closing {len(SESSIONS)} sessions.") + close_all_sessions(SESSIONS) + output_file = get_result_file_name() + export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS) + export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/AutoscalingTests/test_scale_up.py b/tests/AutoscalingTests/test_scale_up.py new file mode 100644 index 0000000000..ddad4a344d --- /dev/null +++ b/tests/AutoscalingTests/test_scale_up.py @@ -0,0 +1,65 @@ +import unittest +import random +import time +import signal +import csv +from csv2md.table import Table +from .common import * + +SESSIONS = [] +RESULTS = [] +TEST_NODE_MAX_SESSIONS = int(os.getenv("TEST_NODE_MAX_SESSIONS", 1)) +TEST_AUTOSCALING_ITERATIONS = int(os.getenv("TEST_AUTOSCALING_ITERATIONS", 20)) + +def signal_handler(signum, frame): + print("Signal received, quitting all sessions...") + close_all_sessions(SESSIONS) + +signal.signal(signal.SIGTERM, signal_handler) +signal.signal(signal.SIGINT, signal_handler) + +class SeleniumAutoscalingTests(unittest.TestCase): + def test_run_tests(self): + try: + for iteration in range(TEST_AUTOSCALING_ITERATIONS): + new_request_sessions = random.randint(1, 3) + start_time = time.time() + start_pods = get_pod_count() + new_sessions = create_sessions_in_parallel(new_request_sessions) + failed_sessions = new_request_sessions - len(new_sessions) + end_time = time.time() + stop_pods = get_pod_count() + SESSIONS.extend(new_sessions) + elapsed_time = end_time - start_time + new_scaled_pods = stop_pods - start_pods + total_sessions = len(SESSIONS) + total_pods = get_pod_count() + print(f"ADDING: Created {new_request_sessions} new sessions in {elapsed_time:.2f} seconds.") + print(f"INFO: Total sessions: {total_sessions}") + print(f"INFO: Total pods: {total_pods}") + if iteration % 5 == 0: + closed_session = randomly_quit_sessions(SESSIONS, 20) + else: + closed_session = 0 + RESULTS.append({ + FIELD_NAMES[0]: iteration + 1, + FIELD_NAMES[1]: new_request_sessions, + FIELD_NAMES[2]: f"{elapsed_time:.2f} s", + FIELD_NAMES[3]: failed_sessions, + FIELD_NAMES[4]: new_scaled_pods, + FIELD_NAMES[5]: total_sessions, + FIELD_NAMES[6]: total_pods, + FIELD_NAMES[7]: TEST_NODE_MAX_SESSIONS, + FIELD_NAMES[8]: (total_pods * TEST_NODE_MAX_SESSIONS) - total_sessions, + FIELD_NAMES[9]: closed_session, + }) + time.sleep(15) + finally: + print(f"FINISH: Closing {len(SESSIONS)} sessions.") + close_all_sessions(SESSIONS) + output_file = get_result_file_name() + export_results_to_csv(f"{output_file}.csv", FIELD_NAMES, RESULTS) + export_results_csv_to_md(f"{output_file}.csv", f"{output_file}.md") + +if __name__ == "__main__": + unittest.main() diff --git a/tests/README.md b/tests/README.md new file mode 100644 index 0000000000..518ea1680d --- /dev/null +++ b/tests/README.md @@ -0,0 +1,18 @@ +Run test in [AutoscalingTest](./AutoscalingTests) + +Install dependencies to virtual environment or system wide. +```bash +python3 -m pip install -r requirements.txt +``` + +Run tests with the following commands. + +```bash +REMOTE_SERVER_ADDR="http://$(hostname -I | cut -d' ' -f1)/selenium/wd/hub" \ +python3 -m unittest AutoscalingTests.test_scale_chaos +``` + +```bash +REMOTE_SERVER_ADDR="http://$(hostname -I | cut -d' ' -f1)/selenium/wd/hub" \ +python3 -m unittest AutoscalingTests.test_scale_up +``` diff --git a/tests/bootstrap.sh b/tests/bootstrap.sh index 6b4ec608ee..df42805ca1 100755 --- a/tests/bootstrap.sh +++ b/tests/bootstrap.sh @@ -1,4 +1,8 @@ #!/usr/bin/env bash +set -o xtrace + +MATRIX_TESTS=${MATRIX_TESTS:-"default"} + cd tests || true if [ "${CI:-false}" = "false" ]; then @@ -14,10 +18,18 @@ else python3 -m pip install selenium==${BINDING_VERSION} | grep -v 'Requirement already satisfied' fi -python3 -m pip install docker requests chardet | grep -v 'Requirement already satisfied' +python3 -m pip install -r requirements.txt | grep -v 'Requirement already satisfied' -python3 test.py $1 -ret_code=$? +if [ "$1" = "AutoscalingTestsScaleUp" ]; then + python3 -m unittest AutoscalingTests.test_scale_up + ret_code=$? +elif [ "$1" = "AutoScalingTestsScaleChaos" ]; then + python3 -m unittest AutoscalingTests.test_scale_chaos + ret_code=$? +else + python3 test.py $1 + ret_code=$? +fi if [ "${CI:-false}" = "false" ]; then deactivate diff --git a/tests/charts/ci/DeploymentAutoscaling-values.yaml b/tests/charts/ci/DeploymentAutoscaling-values.yaml index bf02551cc0..5d4d1e26d9 100644 --- a/tests/charts/ci/DeploymentAutoscaling-values.yaml +++ b/tests/charts/ci/DeploymentAutoscaling-values.yaml @@ -5,7 +5,7 @@ autoscaling: maxReplicaCount: 4 pollingInterval: 10 scaledObjectOptions: - cooldownPeriod: 30 + cooldownPeriod: ${AUTOSCALING_COOLDOWN_PERIOD} terminationGracePeriodSeconds: 360 # Configuration for chrome nodes @@ -47,6 +47,8 @@ chromeNode: value: "1080" - name: TZ value: "Asia/Saigon" + - name: SE_NODE_SESSION_TIMEOUT + value: "3600" readinessProbe: enabled: &readinessProbe true livenessProbe: diff --git a/tests/charts/ci/JobAutoscaling-values.yaml b/tests/charts/ci/JobAutoscaling-values.yaml index 1183442588..e0e02a8cd2 100644 --- a/tests/charts/ci/JobAutoscaling-values.yaml +++ b/tests/charts/ci/JobAutoscaling-values.yaml @@ -22,6 +22,8 @@ chromeNode: value: "1080" - name: TZ value: "Asia/Saigon" + - name: SE_NODE_SESSION_TIMEOUT + value: "3600" readinessProbe: enabled: &readinessProbe false livenessProbe: diff --git a/tests/charts/ci/base-recorder-values.yaml b/tests/charts/ci/base-recorder-values.yaml index 4e75424ba5..bc688ee5a8 100644 --- a/tests/charts/ci/base-recorder-values.yaml +++ b/tests/charts/ci/base-recorder-values.yaml @@ -11,7 +11,7 @@ # AWS_SECRET_ACCESS_KEY: "${AWS_SECRET_ACCESS_KEY}" videoRecorder: - enabled: true + enabled: ${ENABLE_VIDEO_RECORDER} extraVolumes: # - name: videos # persistentVolumeClaim: diff --git a/tests/charts/make/chart_test.sh b/tests/charts/make/chart_test.sh index a5e11cd74e..e1812e18a0 100755 --- a/tests/charts/make/chart_test.sh +++ b/tests/charts/make/chart_test.sh @@ -23,6 +23,8 @@ HUB_CHECKS_INTERVAL=${HUB_CHECKS_INTERVAL:-45} HUB_CHECKS_MAX_ATTEMPTS=${HUB_CHECKS_MAX_ATTEMPTS:-6} WEB_DRIVER_WAIT_TIMEOUT=${WEB_DRIVER_WAIT_TIMEOUT:-120} AUTOSCALING_POLL_INTERVAL=${AUTOSCALING_POLL_INTERVAL:-20} +AUTOSCALING_COOLDOWN_PERIOD=${AUTOSCALING_COOLDOWN_PERIOD:-"1800"} +ENABLE_VIDEO_RECORDER=${ENABLE_VIDEO_RECORDER:-"true"} SCALING_STRATEGY=${SCALING_STRATEGY:-"default"} SKIP_CLEANUP=${SKIP_CLEANUP:-"true"} # For debugging purposes, retain the cluster after the test run CHART_CERT_PATH=${CHART_CERT_PATH:-"${CHART_PATH}/certs/tls.crt"} @@ -51,9 +53,10 @@ else fi EXTERNAL_TLS_SECRET_NAME=${EXTERNAL_TLS_SECRET_NAME:-"external-tls-secret-${RESOURCE_ID}"} SELENIUM_ENABLE_MANAGED_DOWNLOADS=${SELENIUM_ENABLE_MANAGED_DOWNLOADS:-"true"} -MAX_SESSIONS_CHROME=${MAX_SESSIONS_CHROME:-"1"} -MAX_SESSIONS_FIREFOX=${MAX_SESSIONS_FIREFOX:-"1"} -MAX_SESSIONS_EDGE=${MAX_SESSIONS_EDGE:-"1"} +TEST_NODE_MAX_SESSIONS=${TEST_NODE_MAX_SESSIONS:-"1"} +MAX_SESSIONS_CHROME=${MAX_SESSIONS_CHROME:-${TEST_NODE_MAX_SESSIONS}} +MAX_SESSIONS_FIREFOX=${MAX_SESSIONS_FIREFOX:-${TEST_NODE_MAX_SESSIONS}} +MAX_SESSIONS_EDGE=${MAX_SESSIONS_EDGE:-${TEST_NODE_MAX_SESSIONS}} TEST_NAME_OVERRIDE=${TEST_NAME_OVERRIDE:-"false"} TEST_PATCHED_KEDA=${TEST_PATCHED_KEDA:-"true"} BASIC_AUTH_EMBEDDED_URL=${BASIC_AUTH_EMBEDDED_URL:-"false"} @@ -116,6 +119,8 @@ export SELENIUM_NAMESPACE=${SELENIUM_NAMESPACE} export TEST_PV_CLAIM_NAME=${TEST_PV_CLAIM_NAME} export HOST_PATH=$(realpath ./tests/videos) export SELENIUM_ENABLE_MANAGED_DOWNLOADS=${SELENIUM_ENABLE_MANAGED_DOWNLOADS} +export AUTOSCALING_COOLDOWN_PERIOD=${AUTOSCALING_COOLDOWN_PERIOD} +export ENABLE_VIDEO_RECORDER=${ENABLE_VIDEO_RECORDER} RECORDER_VALUES_FILE=${TEST_VALUES_PATH}/base-recorder-values.yaml MATRIX_BROWSER_VALUES_FILE=${TEST_VALUES_PATH}/${MATRIX_BROWSER}-values.yaml envsubst < ${RECORDER_VALUES_FILE} > ./tests/tests/base-recorder-values.yaml @@ -189,6 +194,13 @@ if [ "${SELENIUM_GRID_AUTOSCALING}" = "true" ] && [ -n "${SET_MAX_REPLICAS}" ]; " fi +if [ "${SELENIUM_GRID_AUTOSCALING}" = "true" ] && [ "${CLEAR_POD_HISTORY}" = "true" ]; then + HELM_COMMAND_SET_IMAGES="${HELM_COMMAND_SET_IMAGES} \ + --set autoscaling.scaledJobOptions.successfulJobsHistoryLimit=0 \ + --set autoscaling.scaledJobOptions.failedJobsHistoryLimit=0 \ + " +fi + if [ "${CHART_ENABLE_INGRESS_HOSTNAME}" = "true" ]; then if [[ ! $(cat /etc/hosts) == *"${HOSTNAME_ADDRESS}"* ]]; then sudo -- sh -c -e "echo \"$(hostname -I | cut -d' ' -f1) ${HOSTNAME_ADDRESS}\" >> /etc/hosts" @@ -427,6 +439,8 @@ export WEB_DRIVER_WAIT_TIMEOUT=${WEB_DRIVER_WAIT_TIMEOUT} export SELENIUM_GRID_TEST_HEADLESS=${SELENIUM_GRID_TEST_HEADLESS:-"false"} export TEST_DELAY_AFTER_TEST=${TEST_DELAY_AFTER_TEST:-"0"} export TEST_PLATFORMS=${TEST_PLATFORMS} +export TEST_NODE_MAX_SESSIONS=${TEST_NODE_MAX_SESSIONS} +export TEST_AUTOSCALING_ITERATIONS=${TEST_AUTOSCALING_ITERATIONS:-"20"} if [ "${MATRIX_BROWSER}" = "NoAutoscaling" ]; then ./tests/bootstrap.sh NodeFirefox if [ "${TEST_PLATFORMS}" = "linux/amd64" ]; then @@ -440,6 +454,10 @@ elif [ "${MATRIX_TESTS}" = "CDPTests" ]; then if [ "${TEST_PLATFORMS}" = "linux/amd64" ]; then ./tests/CDPTests/bootstrap.sh "MicrosoftEdge" fi +elif [ "${MATRIX_TESTS}" = "AutoscalingTestsScaleUp" ]; then + ./tests/bootstrap.sh ${MATRIX_TESTS} +elif [ "${MATRIX_TESTS}" = "AutoScalingTestsScaleChaos" ]; then + ./tests/bootstrap.sh ${MATRIX_TESTS} else ./tests/bootstrap.sh ${MATRIX_BROWSER} fi diff --git a/tests/requirements.txt b/tests/requirements.txt new file mode 100644 index 0000000000..5f6ad19361 --- /dev/null +++ b/tests/requirements.txt @@ -0,0 +1,5 @@ +docker +requests +chardet +csv2md +selenium