From bf7b5cfb9ef0b5943ea38c41fd0c35f19c7a5fdb Mon Sep 17 00:00:00 2001
From: xinhe <xin3.he@intel.com>
Date: Mon, 25 Sep 2023 16:06:06 +0800
Subject: [PATCH] support falcon woq quantization (#1280)

Signed-off-by: Xin He <xin3.he@intel.com>
(cherry picked from commit 595d3a1987c77542cd8b904dd16dc1c95ba9bdc7)
---
 neural_compressor/adaptor/pytorch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
index 698dff63d04..dfba572c3b4 100644
--- a/neural_compressor/adaptor/pytorch.py
+++ b/neural_compressor/adaptor/pytorch.py
@@ -1213,6 +1213,7 @@ def _get_quantizable_ops(self, model):
             self.use_bf16
             and (CpuInfo().bf16 or os.getenv("FORCE_BF16") == "1")
             and (self.version.release >= Version("1.11.0").release)
+            and self.approach != "post_training_weight_only"
         ):
             self.bf16_ops = self.query_handler.get_op_types_by_precision("bf16")
             bf16_ops = []
@@ -4817,7 +4818,7 @@ def _get_quantizable_ops_recursively(self, model, prefix, quantizable_ops):
 
         module_dict = dict(model.named_modules())
         for op_name, child in module_dict.items():
-            if type(child) in self.white_list:
+            if isinstance(child, tuple(self.white_list)):
                 quantizable_ops.append((op_name, str(child.__class__.__name__)))
 
     @dump_elapsed_time("Pass query framework capability")