Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce INC3.0 auto-tune #1497

Merged
merged 34 commits into from
Jan 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
c979dbf
add auto-tune
yiliu30 Dec 5, 2023
5ed7730
add docs
yiliu30 Dec 5, 2023
dbd287b
update
yiliu30 Dec 6, 2023
6f0cd9f
add objective for register multiple objs
yiliu30 Dec 13, 2023
82eb3df
Merge branch 'master' into autotune
yiliu30 Dec 25, 2023
fdaed4d
rename runner to quantizer
yiliu30 Dec 25, 2023
3d38f69
add target manager
yiliu30 Dec 25, 2023
b754498
decompose the quantizer
yiliu30 Dec 25, 2023
843729a
udapte the runner to quantizer
yiliu30 Dec 25, 2023
9ad7b7e
add some UTs
yiliu30 Dec 25, 2023
35620e5
add some UTs
yiliu30 Dec 25, 2023
4b0f9dc
resolve conflicts
yiliu30 Dec 26, 2023
a398a9c
clean code
yiliu30 Dec 26, 2023
b51f7d3
update the docstring
yiliu30 Dec 26, 2023
7c16064
add fake get_default_tune_config
yiliu30 Dec 26, 2023
1447d41
Merge branch 'master' into autotune
yiliu30 Dec 29, 2023
d9a5afd
rename some classes
yiliu30 Dec 29, 2023
330b394
rename tuning_order to quant_configs
yiliu30 Dec 29, 2023
605f14f
rename tuning target to objective
yiliu30 Dec 29, 2023
22dc8dc
rename ut
yiliu30 Dec 29, 2023
4c6094f
add more UTs
yiliu30 Dec 29, 2023
118a826
fixed pylints
yiliu30 Dec 29, 2023
1efd18a
revert change
yiliu30 Dec 29, 2023
94d0773
rename some classes
yiliu30 Jan 2, 2024
8f397b7
remove the register eval_fn
yiliu30 Jan 2, 2024
df304e2
Merge branch 'master' into ly/autotune
yiliu30 Jan 2, 2024
5a70cd6
update docs
yiliu30 Jan 2, 2024
d08723d
refine code
yiliu30 Jan 2, 2024
756c7fd
rename algoWrapper to FWKWrapper
yiliu30 Jan 3, 2024
c4159e1
refactor the tuner
yiliu30 Jan 3, 2024
764dc50
rename tuning_objective to tuning_objectives
yiliu30 Jan 3, 2024
0c2157a
add UTs for tf
yiliu30 Jan 4, 2024
a35f72c
fix typos
yiliu30 Jan 5, 2024
7a56efa
Merge branch 'master' into ly/autotune
yiliu30 Jan 5, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 53 additions & 0 deletions neural_compressor/common/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
import re
from abc import ABC, abstractmethod
from collections import OrderedDict
from copy import deepcopy
from itertools import product
from typing import Any, Callable, Dict, List, Optional, Tuple, Union

from neural_compressor.common.logger import Logger
Expand Down Expand Up @@ -225,6 +227,57 @@ def __add__(self, other: BaseConfig) -> BaseConfig:
else:
return ComposableConfig(configs=[self, other])

def expand(self) -> List[BaseConfig]:
"""Expand the config.

case 1
{
"global": { "weight_bits": [4, 6]}
}
expand to :
1st trial config:
{
"global": { "weight_bits": 4}
}
2nd trial config:
{
"global": { "weight_bits": 6}
}
case 2
# TODO (Yi) to support the expansion of config with `local`
{
"global": {
"weight_bits": [4, 6]
},
"local":
{
"fc1":{
"weight_bits": [6, 8]
},
"fc2":{
"weight_bits": [4]
}
}

} -> ?
"""
config_list: List[BaseConfig] = []
params_list = self.params_list
params_dict = OrderedDict()
config = self
for param in params_list:
param_val = getattr(config, param)
# TODO (Yi) to handle param_val itself is a list
if isinstance(param_val, list):
params_dict[param] = param_val
else:
params_dict[param] = [param_val]
for params_values in product(*params_dict.values()):
new_config = self.__class__(**dict(zip(params_list, params_values)))
config_list.append(new_config)
logger.info(f"Expanded the {self.__class__.name} and got {len(config_list)} configs.")
return config_list

def _get_op_name_op_type_config(self):
op_type_config_dict = dict()
op_name_config_dict = dict()
Expand Down
159 changes: 159 additions & 0 deletions neural_compressor/common/base_tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,159 @@
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from abc import abstractmethod
from typing import Any, Callable, Dict, List, Optional, Union

from neural_compressor.common.base_config import BaseConfig, ComposableConfig
from neural_compressor.common.logger import Logger

logger = Logger().get_logger()


class FrameworkWrapper:
"""Abstract base class for wrap framework's APIs.

FrameworkWrapper provides a uniform interface for encapsulating different framework's APIs.
This class is intended to be used by a `tuner` to obtain quantized models.
"""

def __init__(self, model) -> None:
self.model = model

@abstractmethod
def apply(self) -> Any:
"""The entry to apply algorithms on a given model."""
raise NotImplementedError


class TuningObjectives:
EVAL_FN = "eval_fn"
WEIGHT = "weight"
FN_NAME = "name"
EVAL_FN_TEMPLATE: Dict[str, Any] = {EVAL_FN: None, WEIGHT: 1.0, FN_NAME: None}

def __init__(self) -> None:
self.eval_fn_registry: List[Dict[str, Any]] = []

def evaluate(self, model) -> float:
"""Evaluate the model using registered evaluation functions.

Args:
model: The fp32 model or quantized model.

Returns:
The overall result of all registered evaluation functions.
"""
result = 0
for eval_pair in self.eval_fn_registry:
eval_fn = eval_pair[self.EVAL_FN]
eval_result = eval_fn(model)
result = self._update_the_objective_score(eval_pair, eval_result, result)
return result

def _update_the_objective_score(self, eval_pair, eval_result, overall_result) -> float:
# TODO update the result according to the weight and algo_name
return overall_result + eval_result * eval_pair[self.WEIGHT]

def get_number_of_tuning_objectives(self) -> int:
return len(self.eval_fn_registry)

def _set_eval_fn_registry(self, user_eval_fns: List[Dict]) -> None:
self.eval_fn_registry = [
{
self.EVAL_FN: user_eval_fn_pair[self.EVAL_FN],
self.WEIGHT: user_eval_fn_pair.get(self.WEIGHT, 1.0),
self.FN_NAME: user_eval_fn_pair.get(self.FN_NAME, user_eval_fn_pair[self.EVAL_FN].__name__),
}
for user_eval_fn_pair in user_eval_fns
]

def set_eval_fn_registry(self, eval_fns: Optional[Union[Dict, List[Dict]]] = None) -> None:
if eval_fns is None:
return
elif isinstance(eval_fns, Dict):
eval_fns = [eval_fns]
elif isinstance(eval_fns, List):
assert all([isinstance(eval_fn_pair, Dict) for eval_fn_pair in eval_fns])
else:
raise NotImplementedError(f"The eval_fns should be a dict or a list of dict, but got {type(eval_fns)}.")
self._set_eval_fn_registry(eval_fns)


tuning_objectives = TuningObjectives()


class BaseTuningConfig:
"""Base Class for Tuning Criterion.

Args:
quant_configs: quantization configs. Default value is empty.
timeout: Tuning timeout (seconds). Default value is 0 which means early stop.
max_trials: Max tune times. Default value is 100. Combine with timeout field to decide when to exit.
"""

def __init__(self, quant_configs=None, timeout=0, max_trials=100) -> None:
"""Init a TuningCriterion object."""
self.quant_configs = quant_configs
self.timeout = timeout
self.max_trials = max_trials


class Tuner:
yiliu30 marked this conversation as resolved.
Show resolved Hide resolved
def __init__(
self, tune_config: BaseTuningConfig, tuning_objectives: TuningObjectives, fwk_wrapper: FrameworkWrapper
) -> None:
self.tune_config = tune_config
self.tuning_objectives = tuning_objectives
self.fwk_wrapper = fwk_wrapper
self._post_init()

def _post_init(self) -> None:
# check the number of evaluation functions
num_tuning_objectives = self.tuning_objectives.get_number_of_tuning_objectives()
assert (
num_tuning_objectives > 0
), "Please ensure that you register at least one evaluation metric for auto-tune."
logger.info(f"There are {num_tuning_objectives} tuning objectives.")

@staticmethod
def parse_quant_config(quant_config: BaseConfig) -> List[BaseConfig]:
if isinstance(quant_config, ComposableConfig):
result = []
for q_config in quant_config.config_list:
result += q_config.expand()
return result
else:
return quant_config.expand()

def parse_quant_configs(self) -> List[BaseConfig]:
quant_config_list = []
for quant_config in self.tune_config.quant_configs:
quant_config_list.extend(Tuner.parse_quant_config(quant_config))
return quant_config_list

def get_best_model(self, q_model, objective_score: Union[float, int]) -> Any:
# TODO(Yi) enable it at the next PR
pass

def get_tuning_objective_score(self, model) -> float:
eval_result = self.tuning_objectives.evaluate(model)
return eval_result

def search(self) -> Any:
for config in self.parse_quant_configs():
logger.info(f"config {config}")
q_model = self.fwk_wrapper.apply(quant_config=config)
if self.get_best_model(q_model, self.get_tuning_objective_score(q_model)):
return q_model
2 changes: 2 additions & 0 deletions neural_compressor/torch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,5 @@
GPTQConfig,
get_default_gptq_config,
)

from neural_compressor.torch.tune import autotune, TuningConfig, get_default_tune_config
2 changes: 1 addition & 1 deletion neural_compressor/torch/quantization/quantize.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def quantize(
else:
assert isinstance(
quant_config, BaseConfig
), "Please pass a dict or config instance as the quantization configuration."
), f"Please pass a dict or config instance as the quantization configuration, but got {type(quant_config)}."
logger.info(f"Quantize model with config: \n {quant_config.to_json_string()} \n")
# select quantization algo according to config

Expand Down
71 changes: 71 additions & 0 deletions neural_compressor/torch/tune.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
# Copyright (c) 2023 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from typing import Callable, Dict, List, Optional, Tuple, Union

import torch

from neural_compressor.common.base_tune import BaseTuningConfig, FrameworkWrapper, Tuner, tuning_objectives
from neural_compressor.common.logger import Logger
from neural_compressor.torch.quantization.config import GPTQConfig, RTNWeightQuantConfig

logger = Logger().get_logger()


def get_default_tuning_config():
# TODO (Yi) support it in the next PR
return None


class TorchWrapper(FrameworkWrapper):
"""Concrete implementation of `FrameworkWrapper` for PyTorch models."""

def __init__(
self, model: torch.nn.Module, run_fn: Optional[Callable] = None, run_args: Optional[Tuple] = None
) -> None:
super().__init__(model)
self.run_fn = run_fn
self.run_args = run_args

def apply(self, quant_config):
"""The entry to apply quantization algorithms on a given a model."""
logger.info(f"apply quant_config: {quant_config}.")
from neural_compressor.torch import quantize

q_model = quantize(model=self.model, quant_config=quant_config, run_fn=self.run_fn, run_args=self.run_args)
yiliu30 marked this conversation as resolved.
Show resolved Hide resolved
return q_model


class TuningConfig(BaseTuningConfig):
def __init__(self, quant_configs=None, timeout=0, max_trials=100):
super().__init__(quant_configs, timeout, max_trials)


def autotune(
model: torch.nn.Module,
tune_config: TuningConfig,
eval_fns: Optional[Union[Dict, List[Dict]]] = None,
run_fn=None,
run_args=None,
):
tuning_objectives.set_eval_fn_registry(eval_fns)
torch_wrapper = TorchWrapper(model, run_fn, run_args)
tuner = Tuner(tune_config=tune_config, tuning_objectives=tuning_objectives, fwk_wrapper=torch_wrapper)
best_qmodel = tuner.search()
return best_qmodel


def get_default_tune_config():
# TODO use the registered default tuning config in the next PR
return TuningConfig(quant_configs=[GPTQConfig(weight_bits=[4, 8]), RTNWeightQuantConfig(weight_bits=[4, 8])])
19 changes: 19 additions & 0 deletions test/3x/tensorflow/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,5 +297,24 @@ def test_config_to_dict(self):
self.assertIn("local", config_dict)


class TestQuantConfigForAutotune(unittest.TestCase):
def test_expand_config(self):
# test the expand functionalities, the user is not aware it
from neural_compressor.tensorflow import StaticQuantConfig

quant_configs = StaticQuantConfig(
weight_dtype="int8",
weight_sym=True,
weight_granularity=["per_channel", "per_tensor"],
act_dtype="int8",
act_sym=True,
act_granularity="per_channel",
)

expand_config_list = StaticQuantConfig.expand(quant_configs)
self.assertEqual(expand_config_list[0].weight_granularity, "per_channel")
self.assertEqual(expand_config_list[1].weight_granularity, "per_tensor")


if __name__ == "__main__":
unittest.main()
Loading