From 78f0fb6d6ee13119464593745afc9f4f40261da5 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 17:54:59 +0800
Subject: [PATCH 1/9] support habana fp8 test

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .azure-pipelines/scripts/install_nc.sh        |   2 +-
 .../scripts/ut/3x/collect_log_3x.sh           |   3 +-
 .../scripts/ut/3x/coverage.3x_pt_fp8          |  15 +++
 .../scripts/ut/3x/run_3x_pt_fp8.sh            |  35 ++++++
 .azure-pipelines/template/docker-template.yml |  40 +++++--
 .azure-pipelines/template/ut-template.yml     |   4 +
 .azure-pipelines/ut-3x-pt-fp8.yml             | 106 ++++++++++++++++++
 neural_compressor/torch/utils/environ.py      |   1 +
 .../torch/algorithms/fp8_quant/test_basic.py  |  60 ++++++++++
 9 files changed, 252 insertions(+), 14 deletions(-)
 create mode 100644 .azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
 create mode 100644 .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
 create mode 100644 .azure-pipelines/ut-3x-pt-fp8.yml
 create mode 100644 test/3x/torch/algorithms/fp8_quant/test_basic.py

diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
index d3cee07609c..05d4e67055d 100644
--- a/.azure-pipelines/scripts/install_nc.sh
+++ b/.azure-pipelines/scripts/install_nc.sh
@@ -2,7 +2,7 @@
 
 echo -e "\n Install Neural Compressor ... "
 cd /neural-compressor
-if [[ $1 = *"3x_pt" ]]; then
+if [[ $1 = *"3x_pt"* ]]; then
     python -m pip install --no-cache-dir -r requirements_pt.txt
     python setup.py pt bdist_wheel
     pip install dist/neural_compressor*.whl --force-reinstall
diff --git a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
index 386ec397c81..03f4fd02dbf 100644
--- a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
+++ b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh
@@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor
 git fetch
 git checkout master
 rm -rf build dist *egg-info
-echo y | pip uninstall neural_compressor_${1}
+binary_index="${1%_fp8}"
+echo y | pip uninstall neural_compressor_${binary_index}
 cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1}
 
 coverage erase
diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
new file mode 100644
index 00000000000..f1bf27d8da3
--- /dev/null
+++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
@@ -0,0 +1,15 @@
+[run]
+branch = True
+
+[report]
+include =
+ */neural_compressor/torch/algorithms/habana_fp8/*
+ */neural_compressor/torch/amp/*
+exclude_lines =
+ pragma: no cover
+ raise NotImplementedError
+ raise TypeError
+ if self.device == "gpu":
+ if device == "gpu":
+ except ImportError:
+ except Exception as e:
\ No newline at end of file
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
new file mode 100644
index 00000000000..d2aef0c3045
--- /dev/null
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
@@ -0,0 +1,35 @@
+#!/bin/bash
+python -c "import neural_compressor as nc"
+test_case="run 3x Torch Habana FP8"
+echo "${test_case}"
+
+# install requirements
+echo "set up UT env..."
+sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt
+pip install -r /neural-compressor/test/3x/torch/requirements.txt
+pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0
+pip install pytest-cov
+pip install pytest-html
+pip list
+
+export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8
+inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])')
+cd /neural-compressor/test/3x || exit 1
+
+LOG_DIR=/neural-compressor/log_dir
+mkdir -p ${LOG_DIR}
+ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log
+pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name}
+
+cp report.html ${LOG_DIR}/
+
+if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then
+    echo "Find errors in pytest case, please check the output..."
+    echo "Please search for '== FAILURES ==' or '== ERRORS =='"
+    exit 1
+fi
+
+# if ut pass, collect the coverage file into artifacts
+cp .coverage ${LOG_DIR}/.coverage
+
+echo "UT finished successfully! "
\ No newline at end of file
diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
index e7b563bcea7..0bba5ae9e20 100644
--- a/.azure-pipelines/template/docker-template.yml
+++ b/.azure-pipelines/template/docker-template.yml
@@ -16,6 +16,9 @@ parameters:
   - name: repo
     type: string
     default: "https://github.com/intel/neural-compressor"
+  - name: imageSource
+    type: string
+    default: "build"
 
 steps:
   - task: Bash@3
@@ -57,16 +60,22 @@ steps:
           git checkout master
         displayName: "Checkout out master"
 
-  - script: |
-      if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
-        docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
-      fi
-      docker images | grep -i ${{ parameters.repoName }}
-      if [[ $? -ne 0 ]]; then
-        echo "NO Such Repo"
-        exit 1
-      fi
-    displayName: "Build develop docker image"
+  - ${{ if eq(parameters.imageSource, 'build') }}:
+      - script: |
+          if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then
+            docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} .
+          fi
+          docker images | grep -i ${{ parameters.repoName }}
+          if [[ $? -ne 0 ]]; then
+            echo "NO Such Repo"
+            exit 1
+          fi
+        displayName: "Build develop docker image"
+
+  - ${{ if eq(parameters.imageSource, 'pull') }}:
+      - script: |
+            docker pull vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest
+        displayName: "Pull habana docker image"
 
   - script: |
       docker stop $(docker ps -aq)
@@ -79,8 +88,15 @@ steps:
         inputs:
           targetType: "inline"
           script: |
-            docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
-            -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }}
+            if [[ "${{ parameters.imageSource }}" == "build" ]]; then
+                docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
+                ${{ parameters.repoName }}:${{ parameters.repoTag }}
+            else
+                docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
+                --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest
+            fi
             echo "Show the container list after docker run ... "
             docker ps -a
         displayName: "Docker run - ${{ parameters.containerName }} Container"
diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml
index b7fecacd3d7..d8908d22a35 100644
--- a/.azure-pipelines/template/ut-template.yml
+++ b/.azure-pipelines/template/ut-template.yml
@@ -17,6 +17,9 @@ parameters:
   - name: utContainerName
     type: string
     default: "utTest"
+  - name: imageSource
+    type: string
+    default: "build"
 
 steps:
   - template: docker-template.yml
@@ -27,6 +30,7 @@ steps:
       dockerFileName: "Dockerfile"
       containerName: ${{ parameters.utContainerName }}
       repo: ${{ parameters.repo }}
+      imageSource: ${{ parameters.imageSource }}
 
   - script: |
       docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \
diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml
new file mode 100644
index 00000000000..4fa440fea8d
--- /dev/null
+++ b/.azure-pipelines/ut-3x-pt-fp8.yml
@@ -0,0 +1,106 @@
+trigger: none
+
+pr:
+  autoCancel: true
+  drafts: false
+  branches:
+    include:
+      - master
+  paths:
+    include:
+      - neural_compressor/common
+      - setup.py
+      - requirements_pt.txt
+      - .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh
+
+pool: GAUDI
+
+variables:
+  IMAGE_NAME: "neural-compressor"
+  IMAGE_TAG: "py310"
+  UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+  DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir
+  ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8"
+  REPO: $(Build.Repository.Uri)
+
+stages:
+  - stage: Torch_habana
+    displayName: Torch 3x Habana FP8
+    dependsOn: []
+    jobs:
+      - job:
+        displayName: Torch 3x Habana FP8
+        steps:
+          - template: template/ut-template.yml
+            parameters:
+              imageSource: "pull"
+              dockerConfigName: "commonDockerConfig"
+              utScriptFileName: "3x/run_3x_pt_fp8"
+              uploadPath: $(UPLOAD_PATH)
+              utArtifact: "ut_3x_pt_fp8"
+
+  - stage: Torch_habana_baseline
+    displayName: Torch 3x Habana FP8 baseline
+    dependsOn: []
+    jobs:
+      - job:
+        displayName: Torch 3x Habana FP8 baseline
+        steps:
+          - template: template/ut-template.yml
+            parameters:
+              imageSource: "pull"
+              dockerConfigName: "gitCloneDockerConfig"
+              utScriptFileName: "3x/run_3x_pt_fp8"
+              uploadPath: $(UPLOAD_PATH)
+              utArtifact: "ut_3x_pt_fp8"
+
+  - stage: Coverage
+    displayName: "Coverage Compare"
+    pool:
+      vmImage: "ubuntu-latest"
+    dependsOn: [Torch_habana, Torch_habana_baseline]
+    jobs:
+      - job: CollectDatafiles
+        steps:
+          - script: |
+              if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then
+                docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} .
+              fi
+              docker images | grep -i ${IMAGE_NAME}
+              if [[ $? -ne 0 ]]; then
+                echo "NO Such Repo"
+                exit 1
+              fi
+            displayName: "Build develop docker image"
+
+          - task: DownloadPipelineArtifact@2
+            inputs:
+              artifact:
+              patterns: '*_coverage/.coverage'
+              path: $(DOWNLOAD_PATH)
+
+          - script: |
+              echo "--- create container ---"
+              docker run -d -it --name="collectLogs"  -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor  ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash
+              echo "--- docker ps ---"
+              docker ps
+              echo "--- collect logs ---"
+              docker exec collectLogs /bin/bash  +x -c "cd /neural-compressor/.azure-pipelines/scripts \
+              && bash install_nc.sh 3x_pt_fp8 \
+              && bash ut/3x/collect_log_3x.sh 3x_pt_fp8"
+            displayName: "Collect UT Coverage"
+
+          - task: PublishPipelineArtifact@1
+            condition: succeededOrFailed()
+            inputs:
+              targetPath: $(UPLOAD_PATH)
+              artifact: $(ARTIFACT_NAME)
+              publishLocation: "pipeline"
+
+          - task: Bash@3
+            condition: always()
+            inputs:
+              targetType: "inline"
+              script: |
+                docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true"
+            displayName: "Docker clean up"
diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py
index 0697979996d..89780e33365 100644
--- a/neural_compressor/torch/utils/environ.py
+++ b/neural_compressor/torch/utils/environ.py
@@ -57,6 +57,7 @@ def is_hpex_available():
 ## check ipex
 if is_package_available("intel_extension_for_pytorch"):
     _ipex_available = True
+    import habana_frameworks.torch.hpex  # pylint: disable=E0401
 else:
     _ipex_available = False
 
diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
new file mode 100644
index 00000000000..4864b9aaaaf
--- /dev/null
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -0,0 +1,60 @@
+import os
+import sys
+import torch
+import time
+from torch.utils.data import DataLoader
+from torchvision import transforms, datasets
+import torch.nn as nn
+import torch.nn.functional as F
+
+import habana_frameworks.torch.core as htcore
+
+
+class Net(nn.Module):
+    def __init__(self):
+        super(Net, self).__init__()
+        self.fc1   = nn.Linear(784, 256)
+        self.fc2   = nn.Linear(256, 64)
+        self.fc3   = nn.Linear(64, 10)
+    def forward(self, x):
+        out = x.view(-1,28*28)
+        out = F.relu(self.fc1(out))
+        out = F.relu(self.fc2(out))
+        out = self.fc3(out)
+        out = F.log_softmax(out, dim=1)
+        return out
+
+model = Net()
+checkpoint = torch.load('mnist-epoch_20.pth')
+model.load_state_dict(checkpoint)
+
+model = model.eval()
+
+model = model.to("hpu")
+
+
+
+model = torch.compile(model,backend="hpu_backend")
+
+
+transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307,), (0.3081,))])
+
+data_path = './data'
+test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform)
+test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)
+
+correct = 0
+with torch.no_grad():
+    for data, label in test_loader:
+
+        data = data.to("hpu")
+
+        label = label.to("hpu")
+
+        output = model(data)
+        correct += output.argmax(1).eq(label).sum().item()
+
+accuracy = correct / len(test_loader.dataset) * 100
+print('Inference with torch.compile Completed. Accuracy: {:.2f}%'.format(accuracy))
\ No newline at end of file

From f5333b2b3a02485e328db3bbb339a995631148ac Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Tue, 9 Jul 2024 10:03:23 +0000
Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../torch/algorithms/fp8_quant/test_basic.py  | 33 +++++++++----------
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
index 4864b9aaaaf..038420371c1 100644
--- a/test/3x/torch/algorithms/fp8_quant/test_basic.py
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -1,31 +1,33 @@
 import os
 import sys
-import torch
 import time
-from torch.utils.data import DataLoader
-from torchvision import transforms, datasets
-import torch.nn as nn
-import torch.nn.functional as F
 
 import habana_frameworks.torch.core as htcore
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
 
 
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
-        self.fc1   = nn.Linear(784, 256)
-        self.fc2   = nn.Linear(256, 64)
-        self.fc3   = nn.Linear(64, 10)
+        self.fc1 = nn.Linear(784, 256)
+        self.fc2 = nn.Linear(256, 64)
+        self.fc3 = nn.Linear(64, 10)
+
     def forward(self, x):
-        out = x.view(-1,28*28)
+        out = x.view(-1, 28 * 28)
         out = F.relu(self.fc1(out))
         out = F.relu(self.fc2(out))
         out = self.fc3(out)
         out = F.log_softmax(out, dim=1)
         return out
 
+
 model = Net()
-checkpoint = torch.load('mnist-epoch_20.pth')
+checkpoint = torch.load("mnist-epoch_20.pth")
 model.load_state_dict(checkpoint)
 
 model = model.eval()
@@ -33,15 +35,12 @@ def forward(self, x):
 model = model.to("hpu")
 
 
-
-model = torch.compile(model,backend="hpu_backend")
+model = torch.compile(model, backend="hpu_backend")
 
 
-transform=transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.1307,), (0.3081,))])
+transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
 
-data_path = './data'
+data_path = "./data"
 test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform)
 test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)
 
@@ -57,4 +56,4 @@ def forward(self, x):
         correct += output.argmax(1).eq(label).sum().item()
 
 accuracy = correct / len(test_loader.dataset) * 100
-print('Inference with torch.compile Completed. Accuracy: {:.2f}%'.format(accuracy))
\ No newline at end of file
+print("Inference with torch.compile Completed. Accuracy: {:.2f}%".format(accuracy))

From 0010f5031b4e280e48af4e7830be06a0b3effa88 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 21:03:17 +0800
Subject: [PATCH 3/9] bug fix

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 neural_compressor/torch/utils/environ.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py
index 89780e33365..6eb2f849a93 100644
--- a/neural_compressor/torch/utils/environ.py
+++ b/neural_compressor/torch/utils/environ.py
@@ -46,6 +46,7 @@ def is_package_available(package_name):
 ## check hpex
 if is_package_available("habana_frameworks"):
     _hpex_available = True
+    import habana_frameworks.torch.hpex  # pylint: disable=E0401
 else:
     _hpex_available = False
 
@@ -57,7 +58,6 @@ def is_hpex_available():
 ## check ipex
 if is_package_available("intel_extension_for_pytorch"):
     _ipex_available = True
-    import habana_frameworks.torch.hpex  # pylint: disable=E0401
 else:
     _ipex_available = False
 

From 27a06e7d0935657b9dc0bae7f47b95ca1176f4b0 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 21:58:26 +0800
Subject: [PATCH 4/9] fix docker link

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .azure-pipelines/template/docker-template.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
index 0bba5ae9e20..b607d590842 100644
--- a/.azure-pipelines/template/docker-template.yml
+++ b/.azure-pipelines/template/docker-template.yml
@@ -74,7 +74,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'pull') }}:
       - script: |
-            docker pull vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest
+            docker pull vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
         displayName: "Pull habana docker image"
 
   - script: |
@@ -95,7 +95,7 @@ steps:
             else
                 docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
                 --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
-                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest
+                -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest
             fi
             echo "Show the container list after docker run ... "
             docker ps -a

From 7e83c0cde29d50f2cd72c559540a0a647a6098b3 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Tue, 9 Jul 2024 23:42:14 +0800
Subject: [PATCH 5/9] fix bug

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .azure-pipelines/scripts/install_nc.sh      | 2 +-
 .azure-pipelines/scripts/ut/3x/run_3x_pt.sh | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh
index 05d4e67055d..755d51f69c6 100644
--- a/.azure-pipelines/scripts/install_nc.sh
+++ b/.azure-pipelines/scripts/install_nc.sh
@@ -5,7 +5,7 @@ cd /neural-compressor
 if [[ $1 = *"3x_pt"* ]]; then
     python -m pip install --no-cache-dir -r requirements_pt.txt
     python setup.py pt bdist_wheel
-    pip install dist/neural_compressor*.whl --force-reinstall
+    pip install --no-deps dist/neural_compressor*.whl --force-reinstall
 elif [[ $1 = *"3x_tf"* ]]; then
     python -m pip install --no-cache-dir -r requirements_tf.txt
     python setup.py tf bdist_wheel
diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
index b91bc182c7c..5c5637765fa 100644
--- a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
+++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh
@@ -15,6 +15,7 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__
 cd /neural-compressor/test/3x || exit 1
 rm -rf tensorflow
 rm -rf onnxrt
+rm -rf torch/algorithms/fp8_quant
 
 LOG_DIR=/neural-compressor/log_dir
 mkdir -p ${LOG_DIR}

From 012081a1eab6c348fff43a5901655f9b95090bc8 Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Wed, 10 Jul 2024 09:46:06 +0800
Subject: [PATCH 6/9] fix model path

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 test/3x/torch/algorithms/fp8_quant/test_basic.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
index 038420371c1..1f5b102481c 100644
--- a/test/3x/torch/algorithms/fp8_quant/test_basic.py
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -27,7 +27,10 @@ def forward(self, x):
 
 
 model = Net()
-checkpoint = torch.load("mnist-epoch_20.pth")
+model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
+model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
+os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path))
+checkpoint = torch.load(model_path)
 model.load_state_dict(checkpoint)
 
 model = model.eval()

From 2697120154592bfbdd10304d32dc12244471c1bc Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Wed, 10 Jul 2024 13:20:55 +0800
Subject: [PATCH 7/9] update test scripts

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .azure-pipelines/template/docker-template.yml |  4 +-
 .../torch/algorithms/fp8_quant/test_basic.py  | 47 +++++++++----------
 2 files changed, 24 insertions(+), 27 deletions(-)

diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
index b607d590842..9e98d31e6b9 100644
--- a/.azure-pipelines/template/docker-template.yml
+++ b/.azure-pipelines/template/docker-template.yml
@@ -27,7 +27,7 @@ steps:
       script: |
         docker ps -a
         if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then
-            docker start $(docker ps -aq)
+            docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}")
             echo "remove left files through container ..."
             docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor  || true"
         fi
@@ -78,7 +78,7 @@ steps:
         displayName: "Pull habana docker image"
 
   - script: |
-      docker stop $(docker ps -aq)
+      docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}")
       docker rm -vf ${{ parameters.containerName }} || true
       env | sort
     displayName: "Clean docker container"
diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
index 1f5b102481c..ae59de65da1 100644
--- a/test/3x/torch/algorithms/fp8_quant/test_basic.py
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -1,31 +1,29 @@
 import os
 import sys
+import torch
 import time
 
 import habana_frameworks.torch.core as htcore
-import torch
+
+from torch.utils.data import DataLoader
+from torchvision import transforms, datasets
 import torch.nn as nn
 import torch.nn.functional as F
-from torch.utils.data import DataLoader
-from torchvision import datasets, transforms
-
 
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
-        self.fc1 = nn.Linear(784, 256)
-        self.fc2 = nn.Linear(256, 64)
-        self.fc3 = nn.Linear(64, 10)
-
+        self.fc1   = nn.Linear(784, 256)
+        self.fc2   = nn.Linear(256, 64)
+        self.fc3   = nn.Linear(64, 10)
     def forward(self, x):
-        out = x.view(-1, 28 * 28)
+        out = x.view(-1,28*28)
         out = F.relu(self.fc1(out))
         out = F.relu(self.fc2(out))
         out = self.fc3(out)
         out = F.log_softmax(out, dim=1)
         return out
 
-
 model = Net()
 model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
 model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
@@ -38,25 +36,24 @@ def forward(self, x):
 model = model.to("hpu")
 
 
-model = torch.compile(model, backend="hpu_backend")
-
+transform=transforms.Compose([
+        transforms.ToTensor(),
+        transforms.Normalize((0.1307,), (0.3081,))])
 
-transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
-
-data_path = "./data"
-test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform)
-test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32)
+data_path = './data'
+test_kwargs = {'batch_size': 32}
+dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
+test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs)
 
 correct = 0
-with torch.no_grad():
-    for data, label in test_loader:
+for batch_idx, (data, label) in enumerate(test_loader):
+
+    data = data.to("hpu")
 
-        data = data.to("hpu")
+    output = model(data)
 
-        label = label.to("hpu")
+    htcore.mark_step()
 
-        output = model(data)
-        correct += output.argmax(1).eq(label).sum().item()
+    correct += output.max(1)[1].eq(label).sum()
 
-accuracy = correct / len(test_loader.dataset) * 100
-print("Inference with torch.compile Completed. Accuracy: {:.2f}%".format(accuracy))
+print('Accuracy: {:.2f}%'.format(100. * correct / (len(test_loader) * 32)))
\ No newline at end of file

From 193db0b1d1f03add94f7581310883ecaf64fad8f Mon Sep 17 00:00:00 2001
From: chensuyue <suyue.chen@intel.com>
Date: Wed, 10 Jul 2024 14:24:50 +0800
Subject: [PATCH 8/9] update ut test

Signed-off-by: chensuyue <suyue.chen@intel.com>
---
 .../torch/algorithms/fp8_quant/test_basic.py  | 63 +++++++++----------
 1 file changed, 30 insertions(+), 33 deletions(-)

diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
index ae59de65da1..49395b38456 100644
--- a/test/3x/torch/algorithms/fp8_quant/test_basic.py
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -24,36 +24,33 @@ def forward(self, x):
         out = F.log_softmax(out, dim=1)
         return out
 
-model = Net()
-model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
-model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
-os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path))
-checkpoint = torch.load(model_path)
-model.load_state_dict(checkpoint)
-
-model = model.eval()
-
-model = model.to("hpu")
-
-
-transform=transforms.Compose([
-        transforms.ToTensor(),
-        transforms.Normalize((0.1307,), (0.3081,))])
-
-data_path = './data'
-test_kwargs = {'batch_size': 32}
-dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
-test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs)
-
-correct = 0
-for batch_idx, (data, label) in enumerate(test_loader):
-
-    data = data.to("hpu")
-
-    output = model(data)
-
-    htcore.mark_step()
-
-    correct += output.max(1)[1].eq(label).sum()
-
-print('Accuracy: {:.2f}%'.format(100. * correct / (len(test_loader) * 32)))
\ No newline at end of file
+def test_hpu():
+    model = Net()
+    model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
+    model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth"
+    os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path))
+    checkpoint = torch.load(model_path)
+    model.load_state_dict(checkpoint)
+
+    model = model.eval()
+
+    model = model.to("hpu")
+
+    transform=transforms.Compose([
+            transforms.ToTensor(),
+            transforms.Normalize((0.1307,), (0.3081,))])
+
+    data_path = './data'
+    test_kwargs = {'batch_size': 32}
+    dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
+    test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs)
+
+    correct = 0
+    for batch_idx, (data, label) in enumerate(test_loader):
+        data = data.to("hpu")
+        output = model(data)
+        htcore.mark_step()
+        correct += output.max(1)[1].eq(label).sum()
+
+    accuracy = 100. * correct / (len(test_loader) * 32)
+    assert accuracy > 90
\ No newline at end of file

From 651a1267a438fb3cd3358a436586c464bdfa4024 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Wed, 10 Jul 2024 06:28:39 +0000
Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 .../torch/algorithms/fp8_quant/test_basic.py  | 32 +++++++++----------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py
index 49395b38456..98ca06222a5 100644
--- a/test/3x/torch/algorithms/fp8_quant/test_basic.py
+++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py
@@ -1,29 +1,31 @@
 import os
 import sys
-import torch
 import time
 
 import habana_frameworks.torch.core as htcore
-
-from torch.utils.data import DataLoader
-from torchvision import transforms, datasets
+import torch
 import torch.nn as nn
 import torch.nn.functional as F
+from torch.utils.data import DataLoader
+from torchvision import datasets, transforms
+
 
 class Net(nn.Module):
     def __init__(self):
         super(Net, self).__init__()
-        self.fc1   = nn.Linear(784, 256)
-        self.fc2   = nn.Linear(256, 64)
-        self.fc3   = nn.Linear(64, 10)
+        self.fc1 = nn.Linear(784, 256)
+        self.fc2 = nn.Linear(256, 64)
+        self.fc3 = nn.Linear(64, 10)
+
     def forward(self, x):
-        out = x.view(-1,28*28)
+        out = x.view(-1, 28 * 28)
         out = F.relu(self.fc1(out))
         out = F.relu(self.fc2(out))
         out = self.fc3(out)
         out = F.log_softmax(out, dim=1)
         return out
 
+
 def test_hpu():
     model = Net()
     model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth"
@@ -36,14 +38,12 @@ def test_hpu():
 
     model = model.to("hpu")
 
-    transform=transforms.Compose([
-            transforms.ToTensor(),
-            transforms.Normalize((0.1307,), (0.3081,))])
+    transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))])
 
-    data_path = './data'
-    test_kwargs = {'batch_size': 32}
+    data_path = "./data"
+    test_kwargs = {"batch_size": 32}
     dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform)
-    test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs)
+    test_loader = torch.utils.data.DataLoader(dataset1, **test_kwargs)
 
     correct = 0
     for batch_idx, (data, label) in enumerate(test_loader):
@@ -52,5 +52,5 @@ def test_hpu():
         htcore.mark_step()
         correct += output.max(1)[1].eq(label).sum()
 
-    accuracy = 100. * correct / (len(test_loader) * 32)
-    assert accuracy > 90
\ No newline at end of file
+    accuracy = 100.0 * correct / (len(test_loader) * 32)
+    assert accuracy > 90