Skip to content

Commit

Permalink
Remove version limitation of transformers for ort layer-wise (#1753)
Browse files Browse the repository at this point in the history
Signed-off-by: yuwenzho <[email protected]>
  • Loading branch information
yuwenzho authored Apr 26, 2024
1 parent f578b71 commit 522cfe3
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 9 deletions.
8 changes: 0 additions & 8 deletions neural_compressor/onnxrt/algorithms/layer_wise/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@
import onnx
import onnxruntime as ort
import transformers
from packaging.version import Version

from neural_compressor.common import Logger
from neural_compressor.onnxrt.quantization.calibrate import CalibrationDataReader
Expand Down Expand Up @@ -59,13 +58,6 @@ def layer_wise_quant(
Returns:
_type_: _description_
"""
# TODO: remove the limitation for lwq
if Version(transformers.__version__) > Version("4.37.2"):
logger.warning(
"Model (such as llama-2) exported with transformers {} may fail in layer-wise quant. "
"we recommend downgrading transformers to 4.37.2 and try again.".format(transformers.__version__)
)

# check whether model shape is inferred
if not check_model_with_infer_shapes(model):
logger.error(
Expand Down
1 change: 1 addition & 0 deletions requirements_ort.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,4 @@ onnxruntime-extensions
psutil
py-cpuinfo
pydantic
transformers
1 change: 0 additions & 1 deletion test/3x/onnxrt/requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,2 @@
optimum
pytest
transformers==4.37.2 # limitation for test_layer_wise

0 comments on commit 522cfe3

Please sign in to comment.