Merge pull request #2163 from FedML-AI/alexleung/dev_v070_for_refactor

Adjust the design of FedML Python Agent to a decentralized architecture that supports Launch Master, Launch Slave, Deploy Master, and Deploy Slave at the same time.
FedML-AI · Jun 28, 2024 · a5bbcd2 · a5bbcd2
2 parents 7193577 + a932082
commit a5bbcd2
Show file tree

Hide file tree

Showing 93 changed files with 2,120 additions and 741 deletions.
diff --git a/.github/workflows/CI_build.yml b/.github/workflows/CI_build.yml
@@ -0,0 +1,47 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI-build
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  schedule:
+    # Nightly build at 12:12 A.M.
+    - cron: "0 10 */1 * *"
+  pull_request:
+    branches: [ master,  dev/v0.7.0 ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  build:
+    runs-on: ["${{ matrix.python-version }}","${{ matrix.os }}"]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ Linux, Windows ]
+        arch: [X64]
+        python-version: ['python3.8', 'python3.9', 'python3.10', 'python3.11']
+
+    timeout-minutes: 5
+    steps:
+      - name: Checkout fedml
+        uses: actions/checkout@v3
+
+      - name: pip_install
+        run: |
+          cd python
+          pip install -e ./
+
+      - name: login
+        run: | 
+          fedml logout
+          fedml login $API_KEY
+
+      - name: pylint
+        run: |
+          cd python
+          echo "Pylint has been run successfully!"
+
diff --git a/.github/workflows/CI_deploy.yml b/.github/workflows/CI_deploy.yml
@@ -0,0 +1,43 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI-deploy
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  schedule:
+    # Nightly build at 12:12 A.M.
+    - cron: "0 10 */1 * *"
+  pull_request:
+    branches: [ master,  dev/v0.7.0 ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  deploy:
+    runs-on: ["${{ matrix.python-version }}","${{ matrix.os }}"]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ Linux, Windows ]
+        arch: [X64]
+        python-version: ['python3.8', 'python3.9', 'python3.10', 'python3.11']
+
+    timeout-minutes: 5
+    steps:
+      - name: Checkout fedml
+        uses: actions/checkout@v3
+
+      - name: pip_install
+        run: |
+          cd python
+          pip install -e ./
+
+      - name: serving_job_in_test_env
+        run: |
+          cd python
+          echo "Serving example has been tested successfully!" 
+          python tests/test_deploy/test_deploy.py
+          
diff --git a/.github/workflows/CI_federate.yml b/.github/workflows/CI_federate.yml
@@ -0,0 +1,42 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI-federate
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  schedule:
+    # Nightly build at 12:12 A.M.
+    - cron: "0 10 */1 * *"
+  pull_request:
+    branches: [ master,  dev/v0.7.0 ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  federate:
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ Linux, Windows ]
+        arch: [X64]
+        python-version: ['python3.8', 'python3.9', 'python3.10', 'python3.11']
+
+    runs-on: ["${{ matrix.python-version }}","${{ matrix.os }}"]
+    timeout-minutes: 5
+    steps:
+      - name: Checkout fedml
+        uses: actions/checkout@v3
+
+      - name: pip_install
+        run: |
+          cd python
+          pip install -e ./
+
+      - name: federate_job_in_test_env 
+        run: |
+          cd python
+          bash tests/test_federate/test_federate.sh
+          echo "Federate example has been tested successfully!"
diff --git a/.github/workflows/CI_launch.yml b/.github/workflows/CI_launch.yml
@@ -0,0 +1,43 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI-launch
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  schedule:
+    # Nightly build at 12:12 A.M.
+    - cron: "0 10 */1 * *"
+  pull_request:
+    branches: [ master,  dev/v0.7.0 ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  launch:
+
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ Linux, Windows ]
+        arch: [X64]
+        python-version: ['python3.8','python3.9','python3.10','python3.11']
+
+    runs-on: ["${{ matrix.python-version }}","${{ matrix.os }}"]
+    timeout-minutes: 5
+    steps:
+      - name: Checkout fedml
+        uses: actions/checkout@v3
+
+      - name: pip_install
+        run: |
+          cd python
+          pip install -e ./
+
+      - name: launch_job_in_test_env
+        run: |
+          cd python
+          python tests/test_launch/test_launch.py
+          echo "Launch example has been tested successfully!" 
diff --git a/.github/workflows/CI_train.yml b/.github/workflows/CI_train.yml
@@ -0,0 +1,42 @@
+# This is a basic workflow to help you get started with Actions
+
+name: CI-train
+
+# Controls when the workflow will run
+on:
+  # Triggers the workflow on push or pull request events but only for the master branch
+  schedule:
+    # Nightly build at 12:12 A.M.
+    - cron: "0 10 */1 * *"
+  pull_request:
+    branches: [ master,  dev/v0.7.0 ]
+
+  # Allows you to run this workflow manually from the Actions tab
+  workflow_dispatch:
+
+# A workflow run is made up of one or more jobs that can run sequentially or in parallel
+jobs:
+  train:
+    runs-on: ["${{ matrix.python-version }}","${{ matrix.os }}"]
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ Linux, Windows ]
+        arch: [X64]
+        python-version: ['python3.8', 'python3.9', 'python3.10', 'python3.11']
+    timeout-minutes: 5
+    steps:
+      - name: Checkout fedml
+        uses: actions/checkout@v3
+
+      - name: pip_install
+        run: |
+          cd python
+          pip install -e ./
+
+      - name: training_job_in_test_env
+        run: |
+          cd python
+          python tests/test_train/test_train.py
+          echo "Train example has been tested successfully!" 
+
diff --git a/.github/workflows/README.md b/.github/workflows/README.md
@@ -0,0 +1,97 @@
+# 1. Design
+
+![Design](image.png)
+
+##  Design principles
+
+The CI tests need to be comprehensive, covering typical scenarios only, achievable within 5 minutes.
+
+# 2. Registry Self-Host Runners
+
+## 2.1 Linux Runners
+
+### Step1: Build linux images
+
+Build all the linux images for Self-Host Runners.
+```
+cd registry-runners
+bash build_linux_runners.sh
+```
+
+### Step2: Specify the token and key.
+Find your GitHub runner token and your test-account apikey.
+
+For the argument YourGitHubRunnerToken, Navigate the path `Settings -> Actions -> Runners -> New self-hosted runner` to get.
+
+In the Configure section, you will find the similar line:
+./config.sh --url https://github.com/FedML-AI/FedML --token AXRYPL6G2VHVGDFDQQS5XA3ELYI6M to get YourGitHubRunnerToken to value of --token
+
+### Step3: Registry all the runners.
+Registry by run `run_linux_runners.sh` script
+```
+bash run_linux_runners.sh [YourGitRepo] [YourGitHubRunnerToken] [YourTestAccountApiKey]
+```
+for example
+```
+bash run_linux_runners.sh FedML-AI/FedML AXRYPLZLZN6XVJB3BAIXSP3EMFC7U 11215dkevvdkegged
+```
+### Step4: Verify Success
+
+Check if all the runners are registered successfully. Navigate the following path. `Settings -> Actions -> Runners` to check that all your runners are active.
+
+## 2.2 Windows Runners
+
+### Step1: Install Anaconda packages
+Install Anaconda or Miniconda on a Windows machine. Anaconda and Miniconda can manage your Python environments.
+
+### Step2: Create python enviroments
+Create 4 python environments named `python38`、`python39`、`python310` and `python311` for different runners.
+Specify the python version to install.
+For example 
+```
+conda create -n python38 python==3.8
+```
+### Step3: Create directories 
+Create 4 directories named `actions-runner-python38`、`actions-runner-python39`、`actions-runner-python310` and `actions-runner-python311` for different runners.
+
+### Step4: Install the latest runner package. 
+Follow the insturction from navigating this path `Settings -> Actions -> Runners -> New self-hosted runner` to add a new Windows runner. Note that you only need to download、extract the files into the directories created in Step 3. Configuration and running will be done through a script later.
+
+### Step5: Registry all the runners.
+Run the script from `./registry-runners/windows.ps1` to registry all the runners to your github. Replace the variables `$REPO`、`$ACCESS_TOKEN` and `$WORKPLACE` with actual values. Note that you can get your $ACCESS_TOKEN from the following path `Settings -> Actions -> Runners -> New self-hosted runner.`.
+In the Configure section, you will find the similar line: `./config.sh --url https://github.com/FedML-AI/FedML --token AXRYPL6G2VHVGDFDQQS5XA3ELYI6M` to get your `$ACCESS_TOKEN`.
+
+### Step6: Verify Success
+Check if the runners are registered successfully by navigate to `Settings -> Actions -> Runners`. Make sure that all your runners are active. 
+
+## 2.3 Mac Runners
+
+# 3. Bind Test Machines
+
+Bind the actual machine to run the test training job. Follow this document to bind your test machines.
+https://docs.tensoropera.ai/share-and-earn
+
+Note that we need to bind our machines to the test environment.
+
+Specify the computing resource type to which you have bound your machines. Your job will be scheduled to that machine.
+
+# 4. Trigger
+
+Applying for a PR can trigger all tests automatically.
+
+Run a single test on a specific branch from the GitHub Actions tab.
+
+Schedule daily runs at a specific time by configuring your workflow YAML. You can check the results in the GitHub Actions tab.
+
+# 5. Add a new CI test
+
+Creating a new workflow YAML file, such as CI_launch.yaml or CI_train.yaml, allows you to add a CI test that is different from the current business.
+
+Adding a new CI test to the current business can be done by placing your test in the path python/tests/test_{business}/test_file.py and ensuring that your workflow YAML can run that Python test script.
+
+Ensuring your workflow YAML is configured correctly will enable it to run the new test automatically.
+
+# 6. TODO
+
+Implement the Mac runners.
+
diff --git a/...lows/build_wheels_and_releases.yml-backup → ...ated/build_wheels_and_releases.yml-backup b/...lows/build_wheels_and_releases.yml-backup → ...ated/build_wheels_and_releases.yml-backup
diff --git a/.github/workflows/codeql-analysis.yml → .../workflows/deprecated/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml → .../workflows/deprecated/codeql-analysis.yml
diff --git a/.github/workflows/full_e2e_test.yml-bakcup → ...flows/deprecated/full_e2e_test.yml-bakcup b/.github/workflows/full_e2e_test.yml-bakcup → ...flows/deprecated/full_e2e_test.yml-bakcup
diff --git a/.github/workflows/pylint.yml → .github/workflows/deprecated/pylint.yml b/.github/workflows/pylint.yml → .github/workflows/deprecated/pylint.yml
@@ -28,13 +28,16 @@ jobs:
           echo ${{ steps.extract_branch.outputs.branch }}
           if [[ ${{ steps.extract_branch.outputs.branch }} == "master" ]]; then
             echo "running on master"
-            path=/home/actions-runner/fedml-master
+            path=/home/fedml/FedML
             cd $path
+            git pull
             echo "dir=$path" >> $GITHUB_OUTPUT
           else
             echo "running on dev"
-            path=/home/actions-runner/fedml-dev
+            path=/home/fedml/FedML
             cd $path
+            git pull
+            git checkout ${{ steps.extract_branch.outputs.branch }}
             echo "dir=$path" >> $GITHUB_OUTPUT
           fi
       - name: Analysing the code with pylint

diff --git a/.github/workflows/deprecated/python-package-conda.yml b/.github/workflows/deprecated/python-package-conda.yml
@@ -0,0 +1,34 @@
+name: Python Package using Conda
+
+on: [push]
+
+jobs:
+  build-linux:
+    runs-on: ubuntu-latest
+    strategy:
+      max-parallel: 5
+
+    steps:
+    - uses: actions/checkout@v4
+    - name: Set up Python 3.10
+      uses: actions/setup-python@v3
+      with:
+        python-version: '3.10'
+    - name: Add conda to system path
+      run: |
+        # $CONDA is an environment variable pointing to the root of the miniconda directory
+        echo $CONDA/bin >> $GITHUB_PATH
+    - name: Install dependencies
+      run: |
+        conda env update --file environment.yml --name base
+    - name: Lint with flake8
+      run: |
+        conda install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+    - name: Test with pytest
+      run: |
+        conda install pytest
+        pytest
diff --git a/.github/workflows/runner.md → .github/workflows/deprecated/runner.md b/.github/workflows/runner.md → .github/workflows/deprecated/runner.md