Remove version limitation of transformers for ort layer-wise (#1753)

Signed-off-by: yuwenzho <[email protected]>
intel · Apr 26, 2024 · 522cfe3 · 522cfe3
1 parent f578b71
commit 522cfe3
Show file tree

Hide file tree

Showing 3 changed files with 1 addition and 9 deletions.
diff --git a/neural_compressor/onnxrt/algorithms/layer_wise/core.py b/neural_compressor/onnxrt/algorithms/layer_wise/core.py
@@ -26,7 +26,6 @@
 import onnx
 import onnxruntime as ort
 import transformers
-from packaging.version import Version
 
 from neural_compressor.common import Logger
 from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader
@@ -59,13 +58,6 @@ def layer_wise_quant(
     Returns:
         _type_: _description_
     """
-    # TODO: remove the limitation for lwq
-    if Version(transformers.__version__) > Version("4.37.2"):
-        logger.warning(
-            "Model (such as llama-2) exported with transformers {} may fail in layer-wise quant. "
-            "we recommend downgrading transformers to 4.37.2 and try again.".format(transformers.__version__)
-        )
-
     # check whether model shape is inferred
     if not check_model_with_infer_shapes(model):
         logger.error(

diff --git a/requirements_ort.txt b/requirements_ort.txt
@@ -5,3 +5,4 @@ onnxruntime-extensions
 psutil
 py-cpuinfo
 pydantic
+transformers
diff --git a/test/3x/onnxrt/requirements.txt b/test/3x/onnxrt/requirements.txt
@@ -1,3 +1,2 @@
 optimum
 pytest
-transformers==4.37.2 # limitation for test_layer_wise