From bf7b5cfb9ef0b5943ea38c41fd0c35f19c7a5fdb Mon Sep 17 00:00:00 2001 From: xinhe Date: Mon, 25 Sep 2023 16:06:06 +0800 Subject: [PATCH] support falcon woq quantization (#1280) Signed-off-by: Xin He (cherry picked from commit 595d3a1987c77542cd8b904dd16dc1c95ba9bdc7) --- neural_compressor/adaptor/pytorch.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py index 698dff63d04..dfba572c3b4 100644 --- a/neural_compressor/adaptor/pytorch.py +++ b/neural_compressor/adaptor/pytorch.py @@ -1213,6 +1213,7 @@ def _get_quantizable_ops(self, model): self.use_bf16 and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1") and (self.version.release >= Version("1.11.0").release) + and self.approach != "post_training_weight_only" ): self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16") bf16_ops = [] @@ -4817,7 +4818,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops): module_dict = dict(model.named_modules()) for op_name, child in module_dict.items(): - if type(child) in self.white_list: + if isinstance(child, tuple(self.white_list)): quantizable_ops.append((op_name, str(child.__class__.__name__))) @dump_elapsed_time("Pass query framework capability")