intel · chensuyue · Jul 10, 2024 · Jul 9, 2024 · Jul 9, 2024 · Jul 9, 2024
diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
@@ -2,10 +2,10 @@
 
 echo -e "\n Install Neural Compressor ... "
 cd /neural-compressor
-if [[ $1 = *"3x_pt" ]]; then
+if [[ $1 = *"3x_pt"* ]]; then
     python -m pip install --no-cache-dir -r requirements_pt.txt
     python setup.py pt bdist_wheel
-    pip install dist/neural_compressor*.whl --force-reinstall
+    pip install --no-deps dist/neural_compressor*.whl --force-reinstall
 elif [[ $1 = *"3x_tf"* ]]; then
     python -m pip install --no-cache-dir -r requirements_tf.txt
     python setup.py tf bdist_wheel

diff --git a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
@@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor
 git fetch
 git checkout master
 rm -rf build dist *egg-info
-echo y | pip uninstall neural_compressor_${1}
+binary_index="${1%_fp8}"
+echo y | pip uninstall neural_compressor_${binary_index}
 cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1}
 
 coverage erase

diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
@@ -0,0 +1,15 @@
+[run]
+branch = True
+
+[report]
+include =
+ */neural_compressor/torch/algorithms/habana_fp8/*
+ */neural_compressor/torch/amp/*
+exclude_lines =
+ pragma: no cover
+ raise NotImplementedError
+ raise TypeError
+ if self.device == "gpu":
+ if device == "gpu":
+ except ImportError:
+ except Exception as e:
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
@@ -15,6 +15,7 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
 cd /neural-compressor/test/3x || exit 1
 rm -rf tensorflow
 rm -rf onnxrt
+rm -rf torch/algorithms/fp8_quant
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}

diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+python -c "import neural_compressor as nc"
+test_case="run 3x Torch Habana FP8"
+echo "${test_case}"
+
+# install requirements
+echo "set up UT env..."
+sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
+pip install -r /neural-compressor/test/3x/torch/requirements.txt
+pip install git+https://github.com/HabanaAI/[email protected]
+pip install pytest-cov
+pip install pytest-html
+pip list
+
+export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
+inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
+cd /neural-compressor/test/3x || exit 1
+
+LOG_DIR=/neural-compressor/log_dir
+mkdir -p ${LOG_DIR}
+ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name}
+
+cp report.html ${LOG_DIR}/
+
+if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
+    echo "Find errors in pytest case, please check the output..."
+    echo "Please search for '== FAILURES ==' or '== ERRORS =='"
+    exit 1
+fi
+
+# if ut pass, collect the coverage file into artifacts
+cp .coverage ${LOG_DIR}/.coverage
+
+echo "UT finished successfully! "
diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
@@ -16,6 +16,9 @@ parameters:
   - name: repo
     type: string
     default: "https://github.com/intel/neural-compressor"
+  - name: imageSource
+    type: string
+    default: "build"
 
 steps:
   - task: Bash@3
@@ -24,7 +27,7 @@ steps:
       script: |
         docker ps -a
         if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then
-            docker start $(docker ps -aq)
+            docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}")
             echo "remove left files through container ..."
             docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor  || true"
         fi
@@ -57,19 +60,25 @@ steps:
           git checkout master
         displayName: "Checkout out master"
 
-  - script: |
-      if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
-        docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
-      fi
-      docker images | grep -i ${{ parameters.repoName }}
-      if [[ $? -ne 0 ]]; then
-        echo "NO Such Repo"
-        exit 1
-      fi
-    displayName: "Build develop docker image"
+  - ${{ if eq(parameters.imageSource, 'build') }}:
+      - script: |
+          if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
+            docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
+          fi
+          docker images | grep -i ${{ parameters.repoName }}
+          if [[ $? -ne 0 ]]; then
+            echo "NO Such Repo"
+            exit 1
+          fi
+        displayName: "Build develop docker image"
+
+  - ${{ if eq(parameters.imageSource, 'pull') }}:
+      - script: |
+            docker pull vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
+        displayName: "Pull habana docker image"
 
   - script: |
-      docker stop $(docker ps -aq)
+      docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}")
       docker rm -vf ${{ parameters.containerName }} || true
       env | sort
     displayName: "Clean docker container"
@@ -79,8 +88,15 @@ steps:
         inputs:
           targetType: "inline"
           script: |
-            docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
-            -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }}
+            if [[ "${{ parameters.imageSource }}" == "build" ]]; then
+                docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
+                ${{ parameters.repoName }}:${{ parameters.repoTag }}
+            else
+                docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+                --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
+            fi
             echo "Show the container list after docker run ... "
             docker ps -a
         displayName: "Docker run - ${{ parameters.containerName }} Container"
diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml
@@ -17,6 +17,9 @@ parameters:
   - name: utContainerName
     type: string
     default: "utTest"
+  - name: imageSource
+    type: string
+    default: "build"
 
 steps:
   - template: docker-template.yml
@@ -27,6 +30,7 @@ steps:
       dockerFileName: "Dockerfile"
       containerName: ${{ parameters.utContainerName }}
       repo: ${{ parameters.repo }}
+      imageSource: ${{ parameters.imageSource }}
 
   - script: |
       docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \

diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml
@@ -0,0 +1,106 @@
+trigger: none
+
+pr:
+  autoCancel: true
+  drafts: false
+  branches:
+    include:
+      - master
+  paths:
+    include:
+      - neural_compressor/common
+      - setup.py
+      - requirements_pt.txt
+      - .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+
+pool: GAUDI
+
+variables:
+  IMAGE_NAME: "neural-compressor"
+  IMAGE_TAG: "py310"
+  UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+  DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+  ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8"
+  REPO: $(Build.Repository.Uri)
+
+stages:
+  - stage: Torch_habana
+    displayName: Torch 3x Habana FP8
+    dependsOn: []
+    jobs:
+      - job:
+        displayName: Torch 3x Habana FP8
+        steps:
+          - template: template/ut-template.yml
+            parameters:
+              imageSource: "pull"
+              dockerConfigName: "commonDockerConfig"
+              utScriptFileName: "3x/run_3x_pt_fp8"
+              uploadPath: $(UPLOAD_PATH)
+              utArtifact: "ut_3x_pt_fp8"
+
+  - stage: Torch_habana_baseline
+    displayName: Torch 3x Habana FP8 baseline
+    dependsOn: []
+    jobs:
+      - job:
+        displayName: Torch 3x Habana FP8 baseline
+        steps:
+          - template: template/ut-template.yml
+            parameters:
+              imageSource: "pull"
+              dockerConfigName: "gitCloneDockerConfig"
+              utScriptFileName: "3x/run_3x_pt_fp8"
+              uploadPath: $(UPLOAD_PATH)
+              utArtifact: "ut_3x_pt_fp8"
+
+  - stage: Coverage
+    displayName: "Coverage Compare"
+    pool:
+      vmImage: "ubuntu-latest"
+    dependsOn: [Torch_habana, Torch_habana_baseline]
+    jobs:
+      - job: CollectDatafiles
+        steps:
+          - script: |
+              if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
+                docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
+              fi
+              docker images | grep -i ${IMAGE_NAME}
+              if [[ $? -ne 0 ]]; then
+                echo "NO Such Repo"
+                exit 1
+              fi
+            displayName: "Build develop docker image"
+
+          - task: DownloadPipelineArtifact@2
+            inputs:
+              artifact:
+              patterns: '*_coverage/.coverage'
+              path: $(DOWNLOAD_PATH)
+
+          - script: |
+              echo "--- create container ---"
+              docker run -d -it --name="collectLogs"  -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor  ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
+              echo "--- docker ps ---"
+              docker ps
+              echo "--- collect logs ---"
+              docker exec collectLogs /bin/bash  +x -c "cd /neural-compressor/.azure-pipelines/scripts \
+              && bash install_nc.sh 3x_pt_fp8 \
+              && bash ut/3x/collect_log_3x.sh 3x_pt_fp8"
+            displayName: "Collect UT Coverage"
+
+          - task: PublishPipelineArtifact@1
+            condition: succeededOrFailed()
+            inputs:
+              targetPath: $(UPLOAD_PATH)
+              artifact: $(ARTIFACT_NAME)
+              publishLocation: "pipeline"
+
+          - task: Bash@3
+            condition: always()
+            inputs:
+              targetType: "inline"
+              script: |
+                docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
+            displayName: "Docker clean up"
diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py
@@ -46,6 +46,7 @@ def is_package_available(package_name):
 ## check hpex
 if is_package_available("habana_frameworks"):
     _hpex_available = True
+    import habana_frameworks.torch.hpex  # pylint: disable=E0401
 else:
     _hpex_available = False
 

diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -0,0 +1,56 @@
+import os
+import sys
+import time
+
+import habana_frameworks.torch.core as htcore
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.fc1 = nn.Linear(784, 256)
+        self.fc2 = nn.Linear(256, 64)
+        self.fc3 = nn.Linear(64, 10)
+
+    def forward(self, x):
+        out = x.view(-1, 28 * 28)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        out = F.log_softmax(out, dim=1)
+        return out
+
+
+def test_hpu():
+    model = Net()
+    model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
+    model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
+    os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path))
+    checkpoint = torch.load(model_path)
+    model.load_state_dict(checkpoint)
+
+    model = model.eval()
+
+    model = model.to("hpu")
+
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
+
+    data_path = "./data"
+    test_kwargs = {"batch_size": 32}
+    dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
+    test_loader = torch.utils.data.DataLoader(dataset1, **test_kwargs)
+
+    correct = 0
+    for batch_idx, (data, label) in enumerate(test_loader):
+        data = data.to("hpu")
+        output = model(data)
+        htcore.mark_step()
+        correct += output.max(1)[1].eq(label).sum()
+
+    accuracy = 100.0 * correct / (len(test_loader) * 32)
+    assert accuracy > 90