intel · chensuyue · May 20, 2024 · May 13, 2024 · May 13, 2024 · May 15, 2024
diff --git a/...i/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py b/...i/pytorch/nlp/huggingface_models/language-modeling/quantization/llm/run_clm_no_trainer.py
@@ -366,20 +366,11 @@ def run_fn(model):
 
         from utils import get_example_inputs
         example_inputs = get_example_inputs(user_model, calib_dataloader)
-        if args.sq:
-            # currently, smooth quant only support quantize API
-            # TODO: support prepare/convert API for smooth quant
-            from neural_compressor.torch.quantization import quantize
 
-            user_model = quantize(
-                model=user_model, quant_config=quant_config, example_inputs=example_inputs, run_fn=run_fn
-            )
-        else:
-            from neural_compressor.torch.quantization import prepare, convert
-
-            user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
-            run_fn(user_model)
-            user_model = convert(user_model)
+        from neural_compressor.torch.quantization import prepare, convert
+        user_model = prepare(model=user_model, quant_config=quant_config, example_inputs=example_inputs)
+        run_fn(user_model)
+        user_model = convert(user_model)
         user_model.save(args.output_dir)
 
 

diff --git a/neural_compressor/torch/algorithms/base_algorithm.py b/neural_compressor/torch/algorithms/base_algorithm.py
@@ -14,7 +14,7 @@
 
 from abc import ABC, abstractmethod
 from collections import OrderedDict
-from typing import Any, Optional
+from typing import Any, Callable, Optional
 
 import torch
 
@@ -76,7 +76,28 @@ def convert(self, model: torch.nn.Module, *args: Any, **kwargs: Any):
         """
         raise NotImplementedError("{} doesn't implement `convert` function. ".format(self.__class__.__name__))
 
-    def quantize(self, model: torch.nn.Module, *args: Any, **kwargs: Any):
+    @abstractmethod
+    def quantize(
+        self,
+        model: torch.nn.Module,
+        tune_cfg: OrderedDict,
+        run_fn: Callable,
+        example_inputs: Any,
+        inplace=True,
+        *args,
+        **kwargs
+    ):
+        """Quantizes a given float model.
+
+        Args:
+            model (torch.nn.Module): The float model to be quantized.
+
+        Returns:
+            A quantized model.
+        """
+        raise NotImplementedError("{} doesn't implement `quantize` function. ".format(self.__class__.__name__))
+
+    def quantize(self, model: torch.nn.Module, *args: Any, **kwargs: Any):  # noqa: F811
         """Quantizes a given float model.
 
         Args:
@@ -111,5 +132,11 @@ def execute(self, model: torch.nn.Module, mode, *args: Any, **kwargs: Any):
         elif mode == Mode.CONVERT:
             model = self.convert(model, *args, **kwargs)
         elif mode == Mode.QUANTIZE:
-            model = self.quantize(model, *args, **kwargs)
+            if "recipe_cfgs" in self.quant_config:  # keep quantize API for smoothquant
+                run_fn = kwargs.get("run_fn", None)
+                example_inputs = kwargs.get("example_inputs", None)
+                inplace = kwargs.get("inplace", True)
+                model = self.quantize(model, self.quant_config, run_fn, example_inputs, inplace)
+            else:
+                model = self.quantize(model, *args, **kwargs)
         return model
diff --git a/neural_compressor/torch/algorithms/smooth_quant/__init__.py b/neural_compressor/torch/algorithms/smooth_quant/__init__.py
@@ -14,5 +14,5 @@
 # limitations under the License.
 
 from .utility import *
-from .smooth_quant import smooth_quantize
+from .smooth_quant import SmoothQuantQuantizer
 from .save_load import save, load, recover_model_from_json