From 08a334074e826f5be0d45922fb8aa0f6f1bda988 Mon Sep 17 00:00:00 2001 From: Jing Fang Date: Wed, 31 Jul 2024 11:44:54 -0700 Subject: [PATCH] Fix quant_format argument for 4bit quantizer --- .../python/tools/quantization/matmul_4bits_quantizer.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py index 40a4a4d26dc1c..cc8bd622df9b1 100644 --- a/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py +++ b/onnxruntime/python/tools/quantization/matmul_4bits_quantizer.py @@ -797,8 +797,8 @@ def parse_args(): parser.add_argument( "--quant_format", default="QOperator", - type=QuantFormat, - choices=list(QuantFormat), + type=str, + choices=["QOperator", "QDQ"], help="QuantFormat {QOperator, QDQ}" "QOperator format quantizes the model with quantized operators directly." "QDQ format quantize the model by inserting DeQuantizeLinear before the MatMul.", @@ -814,7 +814,7 @@ def parse_args(): input_model_path = args.input_model output_model_path = args.output_model - quant_format = args.quant_format + quant_format = QuantFormat[args.quant_format] if os.path.exists(output_model_path): logger.error(f"file {output_model_path} already exists")