intel · XuehaoSun · Jan 2, 2025 · Dec 16, 2024 · Dec 16, 2024 · Dec 16, 2024
diff --git a/.azure-pipelines/scripts/codeScan/pylint/pylint.sh b/.azure-pipelines/scripts/codeScan/pylint/pylint.sh
@@ -15,7 +15,9 @@ RESET="echo -en \\E[0m \\n" # close color
 log_dir="/auto-round/.azure-pipelines/scripts/codeScan/scanLog"
 mkdir -p $log_dir
 
+pip install torch --index-url https://download.pytorch.org/whl/cpu
 pip install -r /auto-round/requirements.txt
+pip install -r /auto-round/requirements-cpu.txt
 
 echo "[DEBUG] list pipdeptree..."
 pip install pipdeptree

diff --git a/.azure-pipelines/template/docker-template.yml b/.azure-pipelines/template/docker-template.yml
@@ -74,7 +74,7 @@ steps:
 
   - ${{ if eq(parameters.imageSource, 'pull') }}:
       - script: |
-            docker pull vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
+            docker pull vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
         displayName: "Pull habana docker image"
 
   - script: |
@@ -90,12 +90,13 @@ steps:
           script: |
             if [[ "${{ parameters.imageSource }}" == "build" ]]; then
                 docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
-                -v ${BUILD_SOURCESDIRECTORY}:/auto-round -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
-                ${{ parameters.repoName }}:${{ parameters.repoTag }}
+                  -v ${BUILD_SOURCESDIRECTORY}:/auto-round -v /tf_dataset:/tf_dataset -v /tf_dataset2:/tf_dataset2 \
+                  ${{ parameters.repoName }}:${{ parameters.repoTag }}
             else
                 docker run -dit --disable-content-trust --privileged --name=${{ parameters.containerName }} --shm-size="2g" \
-                --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
-                -v ${BUILD_SOURCESDIRECTORY}:/auto-round vault.habana.ai/gaudi-docker/1.18.0/ubuntu22.04/habanalabs/pytorch-installer-2.4.0:latest
+                  --runtime=habana -e HABANA_VISIBLE_DEVICES=all -e OMPI_MCA_btl_vader_single_copy_mechanism=none --cap-add=sys_nice --net=host --ipc=host \
+                  -v ${BUILD_SOURCESDIRECTORY}:/auto-round vault.habana.ai/gaudi-docker/1.19.0/ubuntu22.04/habanalabs/pytorch-installer-2.5.1:latest
+                docker exec ${{ parameters.containerName }} bash -c "ln -sf \$(which python3) /usr/bin/python"
             fi
             echo "Show the container list after docker run ... "
             docker ps -a

diff --git a/.azure-pipelines/template/ut-template.yml b/.azure-pipelines/template/ut-template.yml
@@ -35,14 +35,16 @@ steps:
   - ${{ if eq(parameters.imageSource, 'build') }}:
     - script: |
         docker exec ${{ parameters.utContainerName }} bash -c "cd /auto-round \
-          && pip install -vvv --no-build-isolation . \
+          && pip install torch --index-url https://download.pytorch.org/whl/cpu \
+          && pip install .[cpu] \
           && pip list"
       displayName: "Env Setup"
 
   - ${{ if eq(parameters.imageSource, 'pull') }}:
     - script: |
         docker exec ${{ parameters.utContainerName }} bash -c "cd /auto-round \
-          && pip install -vvv --no-build-isolation . \
+          && python setup.py bdist_wheel lib \
+          && pip install dist/*.whl \
           && pip list"
       displayName: "HPU Env Setup"
 

diff --git a/MANIFEST.in b/MANIFEST.in
@@ -1,3 +1,4 @@
 include requirements.txt
 include requirements-cpu.txt
 include requirements-hpu.txt
+include requirements-lib.txt
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ AutoRound
 <h3> Advanced Quantization Algorithm for LLMs</h3>
 
 [![python](https://img.shields.io/badge/python-3.9%2B-blue)](https://github.com/intel/auto-round)
-[![version](https://img.shields.io/badge/release-0.4.3-green)](https://github.com/intel/auto-round)
+[![version](https://img.shields.io/badge/release-0.4.4-green)](https://github.com/intel/auto-round)
 [![license](https://img.shields.io/badge/license-Apache%202-blue)](https://github.com/intel/auto-round/blob/main/LICENSE)
 ---
 <div align="left">
@@ -57,14 +57,28 @@ details and quantized models in several Hugging Face Spaces, e.g. [OPEA](https:/
 ### Install from pypi
 
 ```bash
-pip install auto-round
+# GPU
+pip install auto-round[gpu]
+
+# CPU
+pip install auto-round[cpu]
+
+# HPU
+pip install auto-round-lib
 ```
 
 <details>
   <summary>Build from Source</summary>
 
   ```bash
-  pip install -vvv --no-build-isolation .
+  # GPU
+  pip install .[gpu]
+
+  # CPU
+  pip install .[cpu]
+
+  # HPU
+  python setup.py install lib
   ```
 
 </details>

diff --git a/auto_round/auto_quantizer.py b/auto_round/auto_quantizer.py
@@ -546,7 +546,7 @@ def remove_device_str(s, device_str):
                 )
             if "gptq" in layer_backend and "exllamav2" in layer_backend:
                 try:
-                    from exllamav2_kernels import gemm_half_q_half, make_q_matrix  # pylint: disable=E0611
+                    from exllamav2_kernels import gemm_half_q_half, make_q_matrix  # pylint: disable=E0611, E0401
                 except:
                     logger.warning_once(
                         "For better inference performance, please install exllamav2 kernel "

diff --git a/auto_round/version.py b/auto_round/version.py
@@ -14,4 +14,4 @@
 """Intel® auto-round: An open-source Python library
 supporting popular model weight only compression based on signround."""
 
-__version__ = "0.4.3"
+__version__ = "0.4.4"
diff --git a/requirements-cpu.txt b/requirements-cpu.txt
@@ -1,16 +1,2 @@
-accelerate
-datasets
-py-cpuinfo
-sentencepiece
-torch
-transformers>=4.38
-numpy < 2.0
-threadpoolctl
-lm-eval>=0.4.2,<0.5
-tqdm
-packaging
-pillow
-numba
-tbb
 intel-extension-for-pytorch
 intel-extension-for-transformers
diff --git a/requirements-gpu.txt b/requirements-gpu.txt
@@ -0,0 +1 @@
+auto-gptq>=0.7.1
diff --git a/requirements-hpu.txt → requirements-lib.txt b/requirements-hpu.txt → requirements-lib.txt
@@ -2,10 +2,10 @@ accelerate
 datasets
 py-cpuinfo
 sentencepiece
-transformers
 numpy < 2.0
 tqdm
 packaging
 pillow
 numba
-tbb
+tbb
+transformers
diff --git a/requirements.txt b/requirements.txt
@@ -2,14 +2,13 @@ accelerate
 datasets
 py-cpuinfo
 sentencepiece
-torch
-transformers>=4.38
 numpy < 2.0
-threadpoolctl
-lm-eval>=0.4.2,<0.5
 tqdm
 packaging
-auto-gptq>=0.7.1
 pillow
 numba
 tbb
+torch
+transformers>=4.38
+threadpoolctl
+lm-eval>=0.4.2,<0.5
diff --git a/setup.py b/setup.py
@@ -25,7 +25,6 @@
 
 def is_cuda_available():
     try:
-        os.system("pip install torch")
         import torch
 
         return torch.cuda.is_available()
@@ -112,15 +111,6 @@ def detect_local_sm_architectures():
     return arch_list
 
 
-def detect_hardware():
-    if is_hpu_available():
-        return "requirements-hpu.txt"
-    elif is_cuda_available():
-        return "requirements.txt"
-    else:
-        return "requirements-cpu.txt"
-
-
 UNSUPPORTED_COMPUTE_CAPABILITIES = ['3.5', '3.7', '5.0', '5.2', '5.3']
 
 if BUILD_CUDA_EXT:
@@ -229,23 +219,60 @@ def detect_hardware():
             "auto_round_extension.*",
         ],
     ),
-    "install_requires": fetch_requirements(detect_hardware()),
+    "install_requires": fetch_requirements("requirements.txt"),
+    "extras_require": {
+        "gpu": fetch_requirements("requirements-gpu.txt"),
+        "cpu": fetch_requirements("requirements-cpu.txt"),
+    },
+}
+
+###############################################################################
+# Configuration for auto_round_lib
+# From pip:
+# pip install auto-round-lib
+# From source:
+# python setup.py lib install
+###############################################################################
+
+
+LIB_REQUIREMENTS_FILE = "requirements-lib.txt"
+LIB_INSTALL_CFG = {
+    "include_packages": find_packages(
+        include=[
+            "auto_round",
+            "auto_round.*",
+            "auto_round_extension",
+            "auto_round_extension.*",
+        ],
+    ),
+    "install_requires": fetch_requirements(LIB_REQUIREMENTS_FILE),
 }
 
 if __name__ == "__main__":
     # There are two ways to install hpu-only package:
-    # 1. pip install -vvv --no-build-isolation -e .[hpu]
-    # 2. Within the gaudi docker where the HPU is available, we install the hpu package by default.
+    # 1. python setup.py lib install
+    # 2. Within the gaudi docker where the HPU is available, we install the auto_round_lib by default.
+    is_user_requesting_library_build = "lib" in sys.argv
+    if is_user_requesting_library_build:
+        sys.argv.remove("lib")
+    should_build_library = is_user_requesting_library_build or BUILD_HPU_ONLY
+
+    if should_build_library:
+        package_name = "auto_round_lib"
+        INSTALL_CFG = LIB_INSTALL_CFG
+    else:
+        package_name = "auto_round"
+        INSTALL_CFG = PKG_INSTALL_CFG
 
-    include_packages = PKG_INSTALL_CFG.get("include_packages", {})
-    install_requires = PKG_INSTALL_CFG.get("install_requires", [])
-    extras_require = PKG_INSTALL_CFG.get("extras_require", {})
+    include_packages = INSTALL_CFG.get("include_packages", {})
+    install_requires = INSTALL_CFG.get("install_requires", [])
+    extras_require = INSTALL_CFG.get("extras_require", {})
 
     setup(
-        name="auto_round",
+        name=package_name,
         author="Intel AIPT Team",
         version=version,
-        author_email="[email protected], [email protected]",
+        author_email="[email protected], [email protected], [email protected]",
         description="Repository of AutoRound: Advanced Weight-Only Quantization Algorithm for LLMs",
         long_description=open("README.md", "r", encoding="utf-8").read(),
         long_description_content_type="text/markdown",
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		auto-gptq>=0.7.1
Copy link Contributor wenhuach21 Dec 16, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. the above 4 should be in requirement Copy link Contributor Author XuehaoSun Dec 16, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. the above 4 should be in requirement All platforms will install requirement.txt, we can't add incompatible packages Copy link Contributor wenhuach21 Dec 16, 2024 Choose a reason for hiding this comment The reason will be displayed to describe this comment to others. Learn more. The basic requirement must support tuning on GPU, otherwise, the user experience will suffer. We may release a whl for HPU.