# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: PerformanceTests ManyFiles TextIOIT HDFS

on:
  schedule:
    - cron: '10 10/12 * * *'
  workflow_dispatch:

#Setting explicit permissions for the action to avoid the default permissions which are `write-all` in case of pull_request_target event
permissions:
  actions: write
  pull-requests: write
  checks: write
  contents: read
  deployments: read
  id-token: none
  issues: write
  discussions: read
  packages: read
  pages: read
  repository-projects: read
  security-events: read
  statuses: read

# This allows a subsequently queued workflow run to interrupt previous runs
concurrency:
  group: '${{ github.workflow }} @ ${{ github.event.issue.number || github.sha || github.head_ref || github.ref }}-${{ github.event.schedule || github.event.comment.id || github.event.sender.login }}'
  cancel-in-progress: true

env:
  GRADLE_ENTERPRISE_ACCESS_KEY: ${{ secrets.GE_ACCESS_TOKEN }}
  GRADLE_ENTERPRISE_CACHE_USERNAME: ${{ secrets.GE_CACHE_USERNAME }}
  GRADLE_ENTERPRISE_CACHE_PASSWORD: ${{ secrets.GE_CACHE_PASSWORD }}
  INFLUXDB_USER: ${{ secrets.INFLUXDB_USER }}
  INFLUXDB_USER_PASSWORD: ${{ secrets.INFLUXDB_USER_PASSWORD }}

jobs:
  beam_PerformanceTests_ManyFiles_TextIOIT_HDFS:
    if: |
      github.event_name == 'workflow_dispatch' ||
      github.event_name == 'schedule' ||
      github.event.comment.body == 'Run Java ManyFilesTextIO Performance Test HDFS'
    runs-on: [self-hosted, ubuntu-20.04, main]
    timeout-minutes: 100
    name: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
    strategy:
      matrix:
        job_name: ["beam_PerformanceTests_ManyFiles_TextIOIT_HDFS"]
        job_phrase: ["Run Java ManyFilesTextIO Performance Test HDFS"]
    steps:
      - uses: actions/checkout@v4
      - name: Setup repository
        uses: ./.github/actions/setup-action
        with:
          comment_phrase: ${{ matrix.job_phrase }}
          github_token: ${{ secrets.GITHUB_TOKEN }}
          github_job: ${{ matrix.job_name }} (${{ matrix.job_phrase }})
      - name: Setup environment
        uses: ./.github/actions/setup-environment-action
      - name: Authenticate on GCP
        id: auth
        uses: google-github-actions/auth@v1
        with:
          credentials_json: ${{ secrets.GCP_SA_KEY }}
          project_id: ${{ secrets.GCP_PROJECT_ID }}
      - name: Set k8s access
        uses: ./.github/actions/setup-k8s-access
        with:
          k8s_namespace: ${{ matrix.job_name }}-${{ github.run_id }}
      - name: Install Hadoop
        id: install_hadoop
        run: |
          kubectl apply -f ${{ github.workspace }}/.test-infra/kubernetes/hadoop/LargeITCluster/hdfs-multi-datanode-cluster.yml
          kubectl wait svc/hadoop --for=jsonpath='{.status.loadBalancer.ingress[0].ip}' --timeout=120s
          loadbalancer_IP=$(kubectl get svc hadoop -o jsonpath='{.status.loadBalancer.ingress[0].ip}')
          echo hadoop_IP=$loadbalancer_IP >> $GITHUB_OUTPUT
      - name: Prepare test arguments
        uses: ./.github/actions/test-arguments-action
        with:
          test-type: performance
          test-language: java
          argument-file-paths: |
            ${{ github.workspace }}/.github/workflows/performance-tests-pipeline-options/manyFiles_TextIOIT_HDFS.txt
          arguments: |
            --filenamePrefix=hdfs://${{ steps.install_hadoop.outputs.hadoop_IP }}:9000/TEXTIO_IT_
            --hdfsConfiguration=[{\\\"fs.defaultFS\\\":\\\"hdfs:${{ steps.install_hadoop.outputs.hadoop_IP }}:9000\\\",\\\"dfs.replication\\\":1}]
      # The env variables are created and populated in the test-arguments-action as "<github.job>_test_arguments_<argument_file_paths_index>"
      - name: run integrationTest
        uses: ./.github/actions/gradle-command-self-hosted-action
        with:
          gradle-command: :sdks:java:io:file-based-io-tests:integrationTest
          arguments: |
            --tests org.apache.beam.sdk.io.text.TextIOIT \
            --info \
            -Dfilesystem=hdfs \
            -DintegrationTestRunner=dataflow \
            -DintegrationTestPipelineOptions='[${{ env.beam_PerformanceTests_ManyFiles_TextIOIT_HDFS_test_arguments_1 }}]'