From 78f0fb6d6ee13119464593745afc9f4f40261da5 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 17:54:59 +0800 Subject: [PATCH 1/9] support habana fp8 test Signed-off-by: chensuyue --- .azure-pipelines/scripts/install_nc.sh | 2 +- .../scripts/ut/3x/collect_log_3x.sh | 3 +- .../scripts/ut/3x/coverage.3x_pt_fp8 | 15 +++ .../scripts/ut/3x/run_3x_pt_fp8.sh | 35 ++++++ .azure-pipelines/template/docker-template.yml | 40 +++++-- .azure-pipelines/template/ut-template.yml | 4 + .azure-pipelines/ut-3x-pt-fp8.yml | 106 ++++++++++++++++++ neural_compressor/torch/utils/environ.py | 1 + .../torch/algorithms/fp8_quant/test_basic.py | 60 ++++++++++ 9 files changed, 252 insertions(+), 14 deletions(-) create mode 100644 .azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 create mode 100644 .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh create mode 100644 .azure-pipelines/ut-3x-pt-fp8.yml create mode 100644 test/3x/torch/algorithms/fp8_quant/test_basic.py diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index d3cee07609c..05d4e67055d 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -2,7 +2,7 @@ echo -e "\n Install Neural Compressor ... " cd /neural-compressor -if [[ $1 = *"3x_pt" ]]; then +if [[ $1 = *"3x_pt"* ]]; then python -m pip install --no-cache-dir -r requirements_pt.txt python setup.py pt bdist_wheel pip install dist/neural_compressor*.whl --force-reinstall diff --git a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh index 386ec397c81..03f4fd02dbf 100644 --- a/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh +++ b/.azure-pipelines/scripts/ut/3x/collect_log_3x.sh @@ -25,7 +25,8 @@ git config --global --add safe.directory /neural-compressor git fetch git checkout master rm -rf build dist *egg-info -echo y | pip uninstall neural_compressor_${1} +binary_index="${1%_fp8}" +echo y | pip uninstall neural_compressor_${binary_index} cd /neural-compressor/.azure-pipelines-pr/scripts && bash install_nc.sh ${1} coverage erase diff --git a/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 new file mode 100644 index 00000000000..f1bf27d8da3 --- /dev/null +++ b/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 @@ -0,0 +1,15 @@ +[run] +branch = True + +[report] +include = + */neural_compressor/torch/algorithms/habana_fp8/* + */neural_compressor/torch/amp/* +exclude_lines = + pragma: no cover + raise NotImplementedError + raise TypeError + if self.device == "gpu": + if device == "gpu": + except ImportError: + except Exception as e: \ No newline at end of file diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh new file mode 100644 index 00000000000..d2aef0c3045 --- /dev/null +++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh @@ -0,0 +1,35 @@ +#!/bin/bash +python -c "import neural_compressor as nc" +test_case="run 3x Torch Habana FP8" +echo "${test_case}" + +# install requirements +echo "set up UT env..." +sed -i '/^intel_extension_for_pytorch/d' /neural-compressor/test/3x/torch/requirements.txt +pip install -r /neural-compressor/test/3x/torch/requirements.txt +pip install git+https://github.com/HabanaAI/DeepSpeed.git@1.16.0 +pip install pytest-cov +pip install pytest-html +pip list + +export COVERAGE_RCFILE=/neural-compressor/.azure-pipelines/scripts/ut/3x/coverage.3x_pt_fp8 +inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__[0])') +cd /neural-compressor/test/3x || exit 1 + +LOG_DIR=/neural-compressor/log_dir +mkdir -p ${LOG_DIR} +ut_log_name=${LOG_DIR}/ut_3x_pt_fp8.log +pytest --cov="${inc_path}" -vs --disable-warnings --html=report.html --self-contained-html torch/algorithms/fp8_quant 2>&1 | tee -a ${ut_log_name} + +cp report.html ${LOG_DIR}/ + +if [ $(grep -c '== FAILURES ==' ${ut_log_name}) != 0 ] || [ $(grep -c '== ERRORS ==' ${ut_log_name}) != 0 ] || [ $(grep -c ' passed' ${ut_log_name}) == 0 ]; then + echo "Find errors in pytest case, please check the output..." + echo "Please search for '== FAILURES ==' or '== ERRORS =='" + exit 1 +fi + +# if ut pass, collect the coverage file into artifacts +cp .coverage ${LOG_DIR}/.coverage + +echo "UT finished successfully! " \ No newline at end of file diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml index e7b563bcea7..0bba5ae9e20 100644 --- a/.azure-pipelines/template/docker-template.yml +++ b/.azure-pipelines/template/docker-template.yml @@ -16,6 +16,9 @@ parameters: - name: repo type: string default: "https://github.com/intel/neural-compressor" + - name: imageSource + type: string + default: "build" steps: - task: Bash@3 @@ -57,16 +60,22 @@ steps: git checkout master displayName: "Checkout out master" - - script: | - if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then - docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} . - fi - docker images | grep -i ${{ parameters.repoName }} - if [[ $? -ne 0 ]]; then - echo "NO Such Repo" - exit 1 - fi - displayName: "Build develop docker image" + - ${{ if eq(parameters.imageSource, 'build') }}: + - script: | + if [[ ! $(docker images | grep -i ${{ parameters.repoName }}:${{ parameters.repoTag }}) ]]; then + docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/${{parameters.dockerFileName}}.devel -t ${{ parameters.repoName }}:${{ parameters.repoTag }} . + fi + docker images | grep -i ${{ parameters.repoName }} + if [[ $? -ne 0 ]]; then + echo "NO Such Repo" + exit 1 + fi + displayName: "Build develop docker image" + + - ${{ if eq(parameters.imageSource, 'pull') }}: + - script: | + docker pull vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest + displayName: "Pull habana docker image" - script: | docker stop $(docker ps -aq) @@ -79,8 +88,15 @@ steps: inputs: targetType: "inline" script: | - docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ - -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 ${{ parameters.repoName }}:${{ parameters.repoTag }} + if [[ "${{ parameters.imageSource }}" == "build" ]]; then + docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ + -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \ + ${{ parameters.repoName }}:${{ parameters.repoTag }} + else + docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ + --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \ + -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest + fi echo "Show the container list after docker run ... " docker ps -a displayName: "Docker run - ${{ parameters.containerName }} Container" diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml index b7fecacd3d7..d8908d22a35 100644 --- a/.azure-pipelines/template/ut-template.yml +++ b/.azure-pipelines/template/ut-template.yml @@ -17,6 +17,9 @@ parameters: - name: utContainerName type: string default: "utTest" + - name: imageSource + type: string + default: "build" steps: - template: docker-template.yml @@ -27,6 +30,7 @@ steps: dockerFileName: "Dockerfile" containerName: ${{ parameters.utContainerName }} repo: ${{ parameters.repo }} + imageSource: ${{ parameters.imageSource }} - script: | docker exec ${{ parameters.utContainerName }} bash -c "cd /neural-compressor/.azure-pipelines/scripts \ diff --git a/.azure-pipelines/ut-3x-pt-fp8.yml b/.azure-pipelines/ut-3x-pt-fp8.yml new file mode 100644 index 00000000000..4fa440fea8d --- /dev/null +++ b/.azure-pipelines/ut-3x-pt-fp8.yml @@ -0,0 +1,106 @@ +trigger: none + +pr: + autoCancel: true + drafts: false + branches: + include: + - master + paths: + include: + - neural_compressor/common + - setup.py + - requirements_pt.txt + - .azure-pipelines/scripts/ut/3x/run_3x_pt_fp8.sh + +pool: GAUDI + +variables: + IMAGE_NAME: "neural-compressor" + IMAGE_TAG: "py310" + UPLOAD_PATH: $(Build.SourcesDirectory)/log_dir + DOWNLOAD_PATH: $(Build.SourcesDirectory)/log_dir + ARTIFACT_NAME: "UT_coverage_report_3x_pt_fp8" + REPO: $(Build.Repository.Uri) + +stages: + - stage: Torch_habana + displayName: Torch 3x Habana FP8 + dependsOn: [] + jobs: + - job: + displayName: Torch 3x Habana FP8 + steps: + - template: template/ut-template.yml + parameters: + imageSource: "pull" + dockerConfigName: "commonDockerConfig" + utScriptFileName: "3x/run_3x_pt_fp8" + uploadPath: $(UPLOAD_PATH) + utArtifact: "ut_3x_pt_fp8" + + - stage: Torch_habana_baseline + displayName: Torch 3x Habana FP8 baseline + dependsOn: [] + jobs: + - job: + displayName: Torch 3x Habana FP8 baseline + steps: + - template: template/ut-template.yml + parameters: + imageSource: "pull" + dockerConfigName: "gitCloneDockerConfig" + utScriptFileName: "3x/run_3x_pt_fp8" + uploadPath: $(UPLOAD_PATH) + utArtifact: "ut_3x_pt_fp8" + + - stage: Coverage + displayName: "Coverage Compare" + pool: + vmImage: "ubuntu-latest" + dependsOn: [Torch_habana, Torch_habana_baseline] + jobs: + - job: CollectDatafiles + steps: + - script: | + if [[ ! $(docker images | grep -i ${IMAGE_NAME}:${IMAGE_TAG}) ]]; then + docker build -f ${BUILD_SOURCESDIRECTORY}/.azure-pipelines/docker/Dockerfile.devel -t ${IMAGE_NAME}:${IMAGE_TAG} . + fi + docker images | grep -i ${IMAGE_NAME} + if [[ $? -ne 0 ]]; then + echo "NO Such Repo" + exit 1 + fi + displayName: "Build develop docker image" + + - task: DownloadPipelineArtifact@2 + inputs: + artifact: + patterns: '*_coverage/.coverage' + path: $(DOWNLOAD_PATH) + + - script: | + echo "--- create container ---" + docker run -d -it --name="collectLogs" -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor ${IMAGE_NAME}:${IMAGE_TAG} /bin/bash + echo "--- docker ps ---" + docker ps + echo "--- collect logs ---" + docker exec collectLogs /bin/bash +x -c "cd /neural-compressor/.azure-pipelines/scripts \ + && bash install_nc.sh 3x_pt_fp8 \ + && bash ut/3x/collect_log_3x.sh 3x_pt_fp8" + displayName: "Collect UT Coverage" + + - task: PublishPipelineArtifact@1 + condition: succeededOrFailed() + inputs: + targetPath: $(UPLOAD_PATH) + artifact: $(ARTIFACT_NAME) + publishLocation: "pipeline" + + - task: Bash@3 + condition: always() + inputs: + targetType: "inline" + script: | + docker exec collectLogs bash -c "rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* || true" + displayName: "Docker clean up" diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py index 0697979996d..89780e33365 100644 --- a/neural_compressor/torch/utils/environ.py +++ b/neural_compressor/torch/utils/environ.py @@ -57,6 +57,7 @@ def is_hpex_available(): ## check ipex if is_package_available("intel_extension_for_pytorch"): _ipex_available = True + import habana_frameworks.torch.hpex # pylint: disable=E0401 else: _ipex_available = False diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py new file mode 100644 index 00000000000..4864b9aaaaf --- /dev/null +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -0,0 +1,60 @@ +import os +import sys +import torch +import time +from torch.utils.data import DataLoader +from torchvision import transforms, datasets +import torch.nn as nn +import torch.nn.functional as F + +import habana_frameworks.torch.core as htcore + + +class Net(nn.Module): + def __init__(self): + super(Net, self).__init__() + self.fc1 = nn.Linear(784, 256) + self.fc2 = nn.Linear(256, 64) + self.fc3 = nn.Linear(64, 10) + def forward(self, x): + out = x.view(-1,28*28) + out = F.relu(self.fc1(out)) + out = F.relu(self.fc2(out)) + out = self.fc3(out) + out = F.log_softmax(out, dim=1) + return out + +model = Net() +checkpoint = torch.load('mnist-epoch_20.pth') +model.load_state_dict(checkpoint) + +model = model.eval() + +model = model.to("hpu") + + + +model = torch.compile(model,backend="hpu_backend") + + +transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + +data_path = './data' +test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform) +test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32) + +correct = 0 +with torch.no_grad(): + for data, label in test_loader: + + data = data.to("hpu") + + label = label.to("hpu") + + output = model(data) + correct += output.argmax(1).eq(label).sum().item() + +accuracy = correct / len(test_loader.dataset) * 100 +print('Inference with torch.compile Completed. Accuracy: {:.2f}%'.format(accuracy)) \ No newline at end of file From f5333b2b3a02485e328db3bbb339a995631148ac Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 9 Jul 2024 10:03:23 +0000 Subject: [PATCH 2/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../torch/algorithms/fp8_quant/test_basic.py | 33 +++++++++---------- 1 file changed, 16 insertions(+), 17 deletions(-) diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py index 4864b9aaaaf..038420371c1 100644 --- a/test/3x/torch/algorithms/fp8_quant/test_basic.py +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -1,31 +1,33 @@ import os import sys -import torch import time -from torch.utils.data import DataLoader -from torchvision import transforms, datasets -import torch.nn as nn -import torch.nn.functional as F import habana_frameworks.torch.core as htcore +import torch +import torch.nn as nn +import torch.nn.functional as F +from torch.utils.data import DataLoader +from torchvision import datasets, transforms class Net(nn.Module): def __init__(self): super(Net, self).__init__() - self.fc1 = nn.Linear(784, 256) - self.fc2 = nn.Linear(256, 64) - self.fc3 = nn.Linear(64, 10) + self.fc1 = nn.Linear(784, 256) + self.fc2 = nn.Linear(256, 64) + self.fc3 = nn.Linear(64, 10) + def forward(self, x): - out = x.view(-1,28*28) + out = x.view(-1, 28 * 28) out = F.relu(self.fc1(out)) out = F.relu(self.fc2(out)) out = self.fc3(out) out = F.log_softmax(out, dim=1) return out + model = Net() -checkpoint = torch.load('mnist-epoch_20.pth') +checkpoint = torch.load("mnist-epoch_20.pth") model.load_state_dict(checkpoint) model = model.eval() @@ -33,15 +35,12 @@ def forward(self, x): model = model.to("hpu") - -model = torch.compile(model,backend="hpu_backend") +model = torch.compile(model, backend="hpu_backend") -transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) +transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) -data_path = './data' +data_path = "./data" test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform) test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32) @@ -57,4 +56,4 @@ def forward(self, x): correct += output.argmax(1).eq(label).sum().item() accuracy = correct / len(test_loader.dataset) * 100 -print('Inference with torch.compile Completed. Accuracy: {:.2f}%'.format(accuracy)) \ No newline at end of file +print("Inference with torch.compile Completed. Accuracy: {:.2f}%".format(accuracy)) From 0010f5031b4e280e48af4e7830be06a0b3effa88 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 21:03:17 +0800 Subject: [PATCH 3/9] bug fix Signed-off-by: chensuyue --- neural_compressor/torch/utils/environ.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/neural_compressor/torch/utils/environ.py b/neural_compressor/torch/utils/environ.py index 89780e33365..6eb2f849a93 100644 --- a/neural_compressor/torch/utils/environ.py +++ b/neural_compressor/torch/utils/environ.py @@ -46,6 +46,7 @@ def is_package_available(package_name): ## check hpex if is_package_available("habana_frameworks"): _hpex_available = True + import habana_frameworks.torch.hpex # pylint: disable=E0401 else: _hpex_available = False @@ -57,7 +58,6 @@ def is_hpex_available(): ## check ipex if is_package_available("intel_extension_for_pytorch"): _ipex_available = True - import habana_frameworks.torch.hpex # pylint: disable=E0401 else: _ipex_available = False From 27a06e7d0935657b9dc0bae7f47b95ca1176f4b0 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 21:58:26 +0800 Subject: [PATCH 4/9] fix docker link Signed-off-by: chensuyue --- .azure-pipelines/template/docker-template.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml index 0bba5ae9e20..b607d590842 100644 --- a/.azure-pipelines/template/docker-template.yml +++ b/.azure-pipelines/template/docker-template.yml @@ -74,7 +74,7 @@ steps: - ${{ if eq(parameters.imageSource, 'pull') }}: - script: | - docker pull vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest + docker pull vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest displayName: "Pull habana docker image" - script: | @@ -95,7 +95,7 @@ steps: else docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \ --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \ - -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/{$OS}/habanalabs/pytorch-installer-2.2.2:latest + -v ${BUILD_SOURCESDIRECTORY}:/neural-compressor vault.habana.ai/gaudi-docker/1.16.1/ubuntu22.04/habanalabs/pytorch-installer-2.2.2:latest fi echo "Show the container list after docker run ... " docker ps -a From 7e83c0cde29d50f2cd72c559540a0a647a6098b3 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Tue, 9 Jul 2024 23:42:14 +0800 Subject: [PATCH 5/9] fix bug Signed-off-by: chensuyue --- .azure-pipelines/scripts/install_nc.sh | 2 +- .azure-pipelines/scripts/ut/3x/run_3x_pt.sh | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.azure-pipelines/scripts/install_nc.sh b/.azure-pipelines/scripts/install_nc.sh index 05d4e67055d..755d51f69c6 100644 --- a/.azure-pipelines/scripts/install_nc.sh +++ b/.azure-pipelines/scripts/install_nc.sh @@ -5,7 +5,7 @@ cd /neural-compressor if [[ $1 = *"3x_pt"* ]]; then python -m pip install --no-cache-dir -r requirements_pt.txt python setup.py pt bdist_wheel - pip install dist/neural_compressor*.whl --force-reinstall + pip install --no-deps dist/neural_compressor*.whl --force-reinstall elif [[ $1 = *"3x_tf"* ]]; then python -m pip install --no-cache-dir -r requirements_tf.txt python setup.py tf bdist_wheel diff --git a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh index b91bc182c7c..5c5637765fa 100644 --- a/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh +++ b/.azure-pipelines/scripts/ut/3x/run_3x_pt.sh @@ -15,6 +15,7 @@ inc_path=$(python -c 'import neural_compressor; print(neural_compressor.__path__ cd /neural-compressor/test/3x || exit 1 rm -rf tensorflow rm -rf onnxrt +rm -rf torch/algorithms/fp8_quant LOG_DIR=/neural-compressor/log_dir mkdir -p ${LOG_DIR} From 012081a1eab6c348fff43a5901655f9b95090bc8 Mon Sep 17 00:00:00 2001 From: chensuyue Date: Wed, 10 Jul 2024 09:46:06 +0800 Subject: [PATCH 6/9] fix model path Signed-off-by: chensuyue --- test/3x/torch/algorithms/fp8_quant/test_basic.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py index 038420371c1..1f5b102481c 100644 --- a/test/3x/torch/algorithms/fp8_quant/test_basic.py +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -27,7 +27,10 @@ def forward(self, x): model = Net() -checkpoint = torch.load("mnist-epoch_20.pth") +model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" +model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth" +os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path)) +checkpoint = torch.load(model_path) model.load_state_dict(checkpoint) model = model.eval() From 2697120154592bfbdd10304d32dc12244471c1bc Mon Sep 17 00:00:00 2001 From: chensuyue Date: Wed, 10 Jul 2024 13:20:55 +0800 Subject: [PATCH 7/9] update test scripts Signed-off-by: chensuyue --- .azure-pipelines/template/docker-template.yml | 4 +- .../torch/algorithms/fp8_quant/test_basic.py | 47 +++++++++---------- 2 files changed, 24 insertions(+), 27 deletions(-) diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml index b607d590842..9e98d31e6b9 100644 --- a/.azure-pipelines/template/docker-template.yml +++ b/.azure-pipelines/template/docker-template.yml @@ -27,7 +27,7 @@ steps: script: | docker ps -a if [[ $(docker ps -a | grep -i '${{ parameters.containerName }}'$) ]]; then - docker start $(docker ps -aq) + docker start $(docker ps -aq --filter "name=${{ parameters.containerName }}") echo "remove left files through container ..." docker exec ${{ parameters.containerName }} bash -c "ls -a /neural-compressor && rm -fr /neural-compressor/* && rm -fr /neural-compressor/.* && ls -a /neural-compressor || true" fi @@ -78,7 +78,7 @@ steps: displayName: "Pull habana docker image" - script: | - docker stop $(docker ps -aq) + docker stop $(docker ps -aq --filter "name=${{ parameters.containerName }}") docker rm -vf ${{ parameters.containerName }} || true env | sort displayName: "Clean docker container" diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py index 1f5b102481c..ae59de65da1 100644 --- a/test/3x/torch/algorithms/fp8_quant/test_basic.py +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -1,31 +1,29 @@ import os import sys +import torch import time import habana_frameworks.torch.core as htcore -import torch + +from torch.utils.data import DataLoader +from torchvision import transforms, datasets import torch.nn as nn import torch.nn.functional as F -from torch.utils.data import DataLoader -from torchvision import datasets, transforms - class Net(nn.Module): def __init__(self): super(Net, self).__init__() - self.fc1 = nn.Linear(784, 256) - self.fc2 = nn.Linear(256, 64) - self.fc3 = nn.Linear(64, 10) - + self.fc1 = nn.Linear(784, 256) + self.fc2 = nn.Linear(256, 64) + self.fc3 = nn.Linear(64, 10) def forward(self, x): - out = x.view(-1, 28 * 28) + out = x.view(-1,28*28) out = F.relu(self.fc1(out)) out = F.relu(self.fc2(out)) out = self.fc3(out) out = F.log_softmax(out, dim=1) return out - model = Net() model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth" @@ -38,25 +36,24 @@ def forward(self, x): model = model.to("hpu") -model = torch.compile(model, backend="hpu_backend") - +transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) -transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) - -data_path = "./data" -test_dataset = datasets.MNIST(data_path, train=False, download=True, transform=transform) -test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=32) +data_path = './data' +test_kwargs = {'batch_size': 32} +dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform) +test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs) correct = 0 -with torch.no_grad(): - for data, label in test_loader: +for batch_idx, (data, label) in enumerate(test_loader): + + data = data.to("hpu") - data = data.to("hpu") + output = model(data) - label = label.to("hpu") + htcore.mark_step() - output = model(data) - correct += output.argmax(1).eq(label).sum().item() + correct += output.max(1)[1].eq(label).sum() -accuracy = correct / len(test_loader.dataset) * 100 -print("Inference with torch.compile Completed. Accuracy: {:.2f}%".format(accuracy)) +print('Accuracy: {:.2f}%'.format(100. * correct / (len(test_loader) * 32))) \ No newline at end of file From 193db0b1d1f03add94f7581310883ecaf64fad8f Mon Sep 17 00:00:00 2001 From: chensuyue Date: Wed, 10 Jul 2024 14:24:50 +0800 Subject: [PATCH 8/9] update ut test Signed-off-by: chensuyue --- .../torch/algorithms/fp8_quant/test_basic.py | 63 +++++++++---------- 1 file changed, 30 insertions(+), 33 deletions(-) diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py index ae59de65da1..49395b38456 100644 --- a/test/3x/torch/algorithms/fp8_quant/test_basic.py +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -24,36 +24,33 @@ def forward(self, x): out = F.log_softmax(out, dim=1) return out -model = Net() -model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" -model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth" -os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path)) -checkpoint = torch.load(model_path) -model.load_state_dict(checkpoint) - -model = model.eval() - -model = model.to("hpu") - - -transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) - -data_path = './data' -test_kwargs = {'batch_size': 32} -dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform) -test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs) - -correct = 0 -for batch_idx, (data, label) in enumerate(test_loader): - - data = data.to("hpu") - - output = model(data) - - htcore.mark_step() - - correct += output.max(1)[1].eq(label).sum() - -print('Accuracy: {:.2f}%'.format(100. * correct / (len(test_loader) * 32))) \ No newline at end of file +def test_hpu(): + model = Net() + model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" + model_path = "/tmp/.neural_compressor/mnist-epoch_20.pth" + os.system("mkdir -p /tmp/.neural_compressor && wget {} -O {} ".format(model_link, model_path)) + checkpoint = torch.load(model_path) + model.load_state_dict(checkpoint) + + model = model.eval() + + model = model.to("hpu") + + transform=transforms.Compose([ + transforms.ToTensor(), + transforms.Normalize((0.1307,), (0.3081,))]) + + data_path = './data' + test_kwargs = {'batch_size': 32} + dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform) + test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs) + + correct = 0 + for batch_idx, (data, label) in enumerate(test_loader): + data = data.to("hpu") + output = model(data) + htcore.mark_step() + correct += output.max(1)[1].eq(label).sum() + + accuracy = 100. * correct / (len(test_loader) * 32) + assert accuracy > 90 \ No newline at end of file From 651a1267a438fb3cd3358a436586c464bdfa4024 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 10 Jul 2024 06:28:39 +0000 Subject: [PATCH 9/9] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- .../torch/algorithms/fp8_quant/test_basic.py | 32 +++++++++---------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/test/3x/torch/algorithms/fp8_quant/test_basic.py b/test/3x/torch/algorithms/fp8_quant/test_basic.py index 49395b38456..98ca06222a5 100644 --- a/test/3x/torch/algorithms/fp8_quant/test_basic.py +++ b/test/3x/torch/algorithms/fp8_quant/test_basic.py @@ -1,29 +1,31 @@ import os import sys -import torch import time import habana_frameworks.torch.core as htcore - -from torch.utils.data import DataLoader -from torchvision import transforms, datasets +import torch import torch.nn as nn import torch.nn.functional as F +from torch.utils.data import DataLoader +from torchvision import datasets, transforms + class Net(nn.Module): def __init__(self): super(Net, self).__init__() - self.fc1 = nn.Linear(784, 256) - self.fc2 = nn.Linear(256, 64) - self.fc3 = nn.Linear(64, 10) + self.fc1 = nn.Linear(784, 256) + self.fc2 = nn.Linear(256, 64) + self.fc3 = nn.Linear(64, 10) + def forward(self, x): - out = x.view(-1,28*28) + out = x.view(-1, 28 * 28) out = F.relu(self.fc1(out)) out = F.relu(self.fc2(out)) out = self.fc3(out) out = F.log_softmax(out, dim=1) return out + def test_hpu(): model = Net() model_link = "https://vault.habana.ai/artifactory/misc/inference/mnist/mnist-epoch_20.pth" @@ -36,14 +38,12 @@ def test_hpu(): model = model.to("hpu") - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,))]) + transform = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]) - data_path = './data' - test_kwargs = {'batch_size': 32} + data_path = "./data" + test_kwargs = {"batch_size": 32} dataset1 = datasets.MNIST(data_path, train=False, download=True, transform=transform) - test_loader = torch.utils.data.DataLoader(dataset1,**test_kwargs) + test_loader = torch.utils.data.DataLoader(dataset1, **test_kwargs) correct = 0 for batch_idx, (data, label) in enumerate(test_loader): @@ -52,5 +52,5 @@ def test_hpu(): htcore.mark_step() correct += output.max(1)[1].eq(label).sum() - accuracy = 100. * correct / (len(test_loader) * 32) - assert accuracy > 90 \ No newline at end of file + accuracy = 100.0 * correct / (len(test_loader) * 32) + assert accuracy > 90