Lyt/blockwise (#1441)

* [Algo] blockwise tuning Signed-off-by: Lu, Yintong <[email protected]> * [Algo] code update Signed-off-by: Lu, Yintong <[email protected]> * [Algo] sq argument update Signed-off-by: Lu, Yintong <[email protected]> * [Algo] log update Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] code update Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix bugs Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] log update Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] enable blockwise on Llama models Signed-off-by: Lu, Yintong <[email protected]> * [Algo] enable blockwise on Llama models Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] code update Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] format code Signed-off-by: Lu, Yintong <[email protected]> * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> * [Algo] add ut Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix format issue Signed-off-by: Lu, Yintong <[email protected]> * [Algo] log update Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] move do_blockwise arg Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> * [Algo] fix bug Signed-off-by: Lu, Yintong <[email protected]> --------- Signed-off-by: Lu, Yintong <[email protected]> Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
intel · Dec 13, 2023 · ee6bc28 · ee6bc28
1 parent 1f236d5
commit ee6bc28
Show file tree

Hide file tree

Showing 7 changed files with 300 additions and 42 deletions.
diff --git a/neural_compressor/adaptor/onnxrt.py b/neural_compressor/adaptor/onnxrt.py
@@ -175,7 +175,13 @@ def smooth_quant(
         scales_per_op=True,
         record_max_info=False,
         weight_clip=True,
-        auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"},
+        auto_alpha_args={
+            "alpha_min": 0.0,
+            "alpha_max": 1.0,
+            "alpha_step": 0.1,
+            "shared_criterion": "mean",
+            "do_blockwise": False,
+        },
         default_alpha=0.5,
     ):
         """Get augmented model with smooth quant.
@@ -194,6 +200,7 @@ def smooth_quant(
             weight_clip: Whether to clip weight when calculating scales; by default it is on.
             auto_alpha_args: Hyperparameters used to set the alpha search space in SQ auto-tuning.
                             By default the search space is 0.0-1.0 with step_size 0.1.
+                            do_blockwise: Whether to do blockwise auto-tuning.
             default_alpha: A hyperparameter that is used in SQ auto-tuning; by default it is 0.5.
 
         Returns:

diff --git a/neural_compressor/adaptor/pytorch.py b/neural_compressor/adaptor/pytorch.py
@@ -1737,7 +1737,13 @@ def smooth_quant(
         force_re_smooth=False,
         record_max_info=False,
         weight_clip=True,
-        auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"},
+        auto_alpha_args={
+            "alpha_min": 0.0,
+            "alpha_max": 1.0,
+            "alpha_step": 0.1,
+            "shared_criterion": "mean",
+            "do_blockwise": False,
+        },
         default_alpha=0.5,
     ):
         """Convert the model by smooth quant.
@@ -1756,6 +1762,7 @@ def smooth_quant(
             weight_clip: Whether to clip weight when calculating scales; by default it is on.
             auto_alpha_args: Hyperparameters used to set the alpha search space in SQ auto-tuning.
                             By default the search space is 0.0-1.0 with step_size 0.1.
+                            do_blockwise determines whether to do blockwise auto-tuning.
             default_alpha: A hyperparameter that is used in SQ auto-tuning; by default it is 0.5.
 
         Returns:

diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py
@@ -1833,7 +1833,13 @@ def smooth_quant(
         scales_per_op=True,
         record_max_info=False,
         weight_clip=True,
-        auto_alpha_args={"alpha_min": 0.0, "alpha_max": 1.0, "alpha_step": 0.1, "shared_criterion": "mean"},
+        auto_alpha_args={
+            "alpha_min": 0.0,
+            "alpha_max": 1.0,
+            "alpha_step": 0.1,
+            "shared_criterion": "mean",
+            "do_blockwise": False,
+        },
         default_alpha=0.5,
     ):
         """Convert the model by smooth quant.
@@ -1852,6 +1858,7 @@ def smooth_quant(
             weight_clip: Whether to clip weight when calculating scales; by default it is on.
             auto_alpha_args: Hyperparameters used to set the alpha search space in SQ auto-tuning.
                             By default the search space is 0.0-1.0 with step_size 0.1.
+                            do_blockwise: Whether to do blockwise auto-tuning.
             default_alpha: A hyperparameter that is used in SQ auto-tuning; by default it is 0.5.
 
         Returns: