Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable the tuning of WOQ algorithm #1328

Merged
merged 22 commits into from
Oct 20, 2023
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
22 commits
Select commit Hold shift + click to select a range
cb696fb
support WOQ algo tuning
Kaihui-intel Oct 17, 2023
1bb5d85
add WoqTuningParams docstring
Kaihui-intel Oct 17, 2023
84f4bc4
Merge branch 'kaihui/wo_tuning' of https://github.com/intel/neural-co…
Kaihui-intel Oct 17, 2023
9afdc8c
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 17, 2023
25df777
Merge branch 'kaihui/wo_tuning' of https://github.com/intel/neural-co…
Kaihui-intel Oct 17, 2023
e5119ba
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 17, 2023
d0b1d4a
fix docstring
Kaihui-intel Oct 17, 2023
5f8cda8
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 17, 2023
073f2cb
support woq to auto & add ut
Kaihui-intel Oct 18, 2023
f9b7d43
Merge branch 'kaihui/wo_tuning' of https://github.com/intel/neural-co…
Kaihui-intel Oct 18, 2023
223f9eb
add docstring for util
Kaihui-intel Oct 18, 2023
1bc7c62
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 18, 2023
1b8ac4a
Merge branch 'kaihui/wo_tuning' of https://github.com/intel/neural-co…
Kaihui-intel Oct 18, 2023
0aa5321
support woq tuning for onnxrt
yuwenzho Oct 19, 2023
ce2dac0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2023
d87ada6
update UT of onnxrt woq tuning
yuwenzho Oct 19, 2023
b411551
Merge branch 'master' into kaihui/wo_tuning
yuwenzho Oct 19, 2023
9e5537f
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2023
1369c7f
update dataloader check for onnxrt woq
yuwenzho Oct 19, 2023
e95fb8b
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Oct 19, 2023
8ba8272
Merge branch 'master' into kaihui/wo_tuning
yuwenzho Oct 20, 2023
b409d1c
remove recipes from woq docstring
Kaihui-intel Oct 20, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 6 additions & 2 deletions neural_compressor/adaptor/ox_utils/weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ def rtn_quantize(

dtype = weight.dtype

if node.name in weight_config:
if node.name in weight_config and "group_size" in weight_config[node.name]:
num_bits = weight_config[node.name]["bits"]
group_size = weight_config[node.name]["group_size"]
scheme = weight_config[node.name]["scheme"]
Expand Down Expand Up @@ -328,7 +328,11 @@ def apply_awq_scale(model, weight_config, absorb_pairs, output_dicts, num_bits,
weight = []
org_out = []
for node in nodes:
if node.name in weight_config and weight_config.get(node.name, "fp32") != "fp32":
if (
node.name in weight_config
and weight_config.get(node.name, "fp32") != "fp32"
and "group_size" in weight_config[node.name]
):
num_bits = weight_config[node.name]["bits"]
group_size = weight_config[node.name]["group_size"]
scheme = weight_config[node.name]["scheme"]
Expand Down
6 changes: 6 additions & 0 deletions neural_compressor/strategy/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,12 @@ def next_tune_cfg(self):
op_tuning_cfg["calib_sampling_size"] = calib_sampling_size_lst[0]
if not self.cur_best_tuning_cfg:
self.cur_best_tuning_cfg = deepcopy(op_tuning_cfg)

# try to tune a WeightOnlyQuant algorithm
if self._should_tuning_woq_algo():
for tune_cfg in self.tuning_woq_algo(tuning_space, deepcopy(self.cur_best_tuning_cfg)):
yield tune_cfg

# try to tune sq alpha
if self._should_tuning_sq_alpha(self.config.recipes):
for tune_cfg in self.tuning_sq_alpha(tuning_space, deepcopy(self.cur_best_tuning_cfg), self.config.recipes):
Expand Down
6 changes: 6 additions & 0 deletions neural_compressor/strategy/basic.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,6 +312,12 @@ def next_tune_cfg(self):
stage1_max = 1e9 # TODO set a more appropriate value
if not self.cur_best_tuning_cfg:
self.cur_best_tuning_cfg = deepcopy(initial_op_tuning_cfg)

# try to tune a WeightOnlyQuant algorithm
Kaihui-intel marked this conversation as resolved.
Show resolved Hide resolved
if self._should_tuning_woq_algo():
for tune_cfg in self.tuning_woq_algo(tuning_space, deepcopy(self.cur_best_tuning_cfg)):
yield tune_cfg

# try to tune sq alpha
if self._should_tuning_sq_alpha(self.config.recipes):
for tune_cfg in self.tuning_sq_alpha(
Expand Down
38 changes: 37 additions & 1 deletion neural_compressor/strategy/strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@
from .utils.tuning_sampler import tuning_sampler_dict
from .utils.tuning_space import TuningSpace
from .utils.tuning_structs import OpTuningConfig
from .utils.utility import build_slave_faker_model, quant_options
from .utils.utility import build_slave_faker_model, check_key_exist, quant_options

STRATEGIES = {}

Expand Down Expand Up @@ -1153,6 +1153,41 @@ def tuning_sq_alpha(self, tuning_space, tuning_cfg, recipes):
for tune_cfg in sq_sampler:
yield tune_cfg

def _should_tuning_woq_algo(self):
"""Currently, it's only available for the ORT backend with approach is weight_only.

It will be triggered when
a) quant_level is auto or quant_level is 1 && strategy is basic
b) and the "algorithm" is not set in op_type_dict
c) and woq will only trigger once
"""
return (
"onnx" in self.framework.lower()
and "weight_only" in self.config.approach
and not check_key_exist(self.config.op_type_dict, "algorithm")
and not check_key_exist(self.tuning_history, "woq_tuning_cfg")
)

def tuning_woq_algo(self, tuning_space, tuning_cfg):
"""Tuning smooth quant's alpha.
yuwenzho marked this conversation as resolved.
Show resolved Hide resolved

Args:
tuning_space: tuning space
tuning_cfg: the initial tuning config
recipes: recipes specified by user
Kaihui-intel marked this conversation as resolved.
Show resolved Hide resolved

Yields:
tuning config
"""
logger.info("[STRATEGY] Start tuning Weight Only Quant' algo.")
woq_sampler = tuning_sampler_dict.get_class("woq_algorithm")(tuning_space, [], tuning_cfg)
for tune_cfg in woq_sampler:
yield tune_cfg

logger.info(
"[Strategy] The best tuning config with WeightOnlyQuant is" f"{self.cur_best_tuning_cfg['woq_tuning_cfg']}."
)

def initial_dynamic_cfg_based_on_static_cfg(self, op_static_cfg: OpTuningConfig):
"""Init the dynamic tuning config according to the static config.

Expand Down Expand Up @@ -1322,6 +1357,7 @@ def _tune_cfg_converter(self, op_tuning_cfg):
# For not tuning recipe, tune cfg use it directly
tune_cfg["recipe_cfgs"].update(self._not_tuning_recipes_values)
tune_cfg["trial_number"] = deepcopy(self.trials_count)
tune_cfg.setdefault("woq_tuning_cfg", op_tuning_cfg.get("woq_tuning_cfg"))
# The sq-related args comes from user config, current best tuning config
# TODO simplify the logic for transforming the arguments
# update the sq-related args from self.cur_best_tuning_cfg
Expand Down
25 changes: 25 additions & 0 deletions neural_compressor/strategy/utils/constant.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# limitations under the License.
"""Strategy constant."""

from enum import Enum

PRECISION_LIST = ["bf16", "fp16", "fp32"]
QUANT_MODE_SET = {"static", "dynamic"}
LOWER_BIT_LIST = ["int4"]
Expand Down Expand Up @@ -56,3 +58,26 @@
"last_conv_or_matmul_quantization",
"pre_post_process_quantization",
}


class WoqTuningParams(Enum):
"""This enumeration class represents the different tuning parameters for the weight only quant (WOQ) algorithm.

Args:
Enum (Enum): base enumeration class

Attributes:
RTN (int): Represents the RTN algorithm, which is a type of WOQ algorithm.
GPTQ (int): Represents the GPTQ algorithm, which is a type of WOQ algorithm.
GPTQ_DISABLE_LAST_MATMUL (int): Represents the GPTQ algorithm with the last matrix multiplication disabled.
GPTQ_GROUP_SIZE_32 (int): Represents the GPTQ algorithm with a group size of 32.
GPTQ_GROUP_SIZE_128 (int): Represents the GPTQ algorithm with a group size of 128.
AWQ (int): Represents the AWQ algorithm, which is a type of WOQ algorithm.
"""

RTN = 1
GPTQ = 2
GPTQ_DISABLE_LAST_MATMUL = 3
GPTQ_GROUP_SIZE_32 = 4
GPTQ_GROUP_SIZE_128 = 5
AWQ = 6
32 changes: 32 additions & 0 deletions neural_compressor/strategy/utils/tuning_sampler.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
from typing import Any, Dict, List, Tuple, Union

from ...utils import logger
from ..utils.constant import WoqTuningParams
from .tuning_space import TuningSpace, pattern_to_internal, quant_mode_from_pattern
from .tuning_structs import OpTuningConfig
from .utility import ClassRegister
Expand Down Expand Up @@ -609,3 +610,34 @@ def __iter__(self):
recipe_cfgs["smooth_quant_args"] = {"alpha": alpha}
logger.debug(f"[STRATEGY] set smooth quant alpha with: {alpha:.4f}")
yield new_tune_cfg


@tuning_sampler_dict("woq_algorithm")
class WeightOnlyQuantSampler(TuningSampler):
Kaihui-intel marked this conversation as resolved.
Show resolved Hide resolved
"""Not displayed in API Docs."""

def __init__(
self,
tuning_space: TuningSpace,
tuning_order_lst: List[TuningOrder],
initial_op_tuning_cfg: Dict,
):
"""Init tuning sampler.

Args:
tuning_space: The tuning space.
tuning_order_lst: The traverse orders.
initial_op_tuning_cfg: The initialized tuning config.
"""
super().__init__(tuning_space, tuning_order_lst, initial_op_tuning_cfg)

def __iter__(self):
"""Yield the next tuning config.

Yields:
The next tuning config.
"""
new_tune_cfg = copy.deepcopy(self.initial_op_tuning_cfg)
for algo in WoqTuningParams:
new_tune_cfg["woq_tuning_cfg"] = algo
yield new_tune_cfg
31 changes: 31 additions & 0 deletions neural_compressor/strategy/utils/utility.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,6 +126,37 @@ def get_adaptor_name(adaptor):
return ""


def check_key_exist(data, key):
yuwenzho marked this conversation as resolved.
Show resolved Hide resolved
"""Recursively checks if a key exists in a dictionary or list.

Args:
data (dict or list): The dictionary or list to search.
key (any): The key to search for.

Returns:
bool: True if the key exists in the data structure, False otherwise.

Examples:
>>> check_key_exist({'a': 1, 'b': {'c': 2}}, 'c')
True
>>> check_key_exist([{'a': 1}, {'b': 2}], 'b')
True
>>> check_key_exist({'a': 1, 'b': [1, 2, 3]}, 'c')
False
"""
if isinstance(data, dict):
if key in data:
return True
for value in data.values():
if check_key_exist(value, key):
return True
elif isinstance(data, list):
for item in data:
if check_key_exist(item, key):
return True
return False


def build_slave_faker_model():
"""Slave does not have a model, so construct a fake model.

Expand Down
Loading