diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py index 094795b37b7..b28cf65175f 100644 --- a/neural_compressor/adaptor/tensorflow.py +++ b/neural_compressor/adaptor/tensorflow.py @@ -190,7 +190,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, post callbacks = kwargs["kwargs"].get("callbacks", None) execution_mode = kwargs["kwargs"].get("execution_mode", None) distributed = getattr(dataloader, "distributed", False) - from neural_compressor.experimental.common.criterion import TensorflowKnowledgeDistillationLoss + from neural_compressor.compression.distillation.criterions import TensorflowKnowledgeDistillationLoss if isinstance(criterion, TensorflowKnowledgeDistillationLoss): input_model = model._model @@ -1757,8 +1757,8 @@ def _get_mse_order( def _partial_dataset_of(self, dataloader, confidence_batches): """Partial dataset.""" + from neural_compressor.data.datasets.dummy_dataset import DummyDataset from neural_compressor.data.datasets.dummy_dataset import DummyDataset as DummyDataset_v2_x - from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDataset_v2_x): assert isinstance(confidence_batches, int) diff --git a/neural_compressor/compression/distillation/criterions.py b/neural_compressor/compression/distillation/criterions.py index 745260699a2..7c9b4896ae4 100644 --- a/neural_compressor/compression/distillation/criterions.py +++ b/neural_compressor/compression/distillation/criterions.py @@ -17,6 +17,8 @@ """Initialize critetion classes. Classes includes: + TensorFlowCrossEntropyLoss, PyTorchCrossEntropyLoss, + TensorFlowSparseCategoricalCrossentropy, TensorflowKnowledgeDistillationLoss, PyTorchKnowledgeDistillationLoss, PyTorchIntermediateLayersKnowledgeDistillationLoss. """ @@ -91,7 +93,12 @@ def __getitem__(self, criterion_type): Returns: cls: criterion class. """ - assert criterion_type in self.criterions.keys(), "only support criterions in {}".format(self.criterions.keys()) + assert ( + criterion_type in self.criterions.keys() + ), "only support criterions in {} \ + , but got criterion type {}".format( + self.criterions.keys(), criterion_type + ) return self.criterions[criterion_type] @@ -130,6 +137,119 @@ def decorator_criterion(cls): return decorator_criterion +@criterion_registry("CrossEntropyLoss", "tensorflow") +class TensorFlowCrossEntropyLoss(object): + """TensorFlow CrossEntropyLoss criterion.""" + + def __init__(self, param_dict): + """Initialize the Datasets class. + + Args: + param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion. + """ + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = {"reduction": "reduction", "from_logits": "from_logits"} + _dict = {} + for key in self._param_dict: + if key in _param_map: + if key == "reduction": + assert self._param_dict[key] in [ + "auto", + "none", + "sum", + "sum_over_batch_size", + ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self): + """Call the TensorFlowCrossEntropyLoss. + + Returns: + cls: criterion class. + param_dict(dict): param_dict + """ + return tf.keras.losses.CategoricalCrossentropy, self._mapping() + + +@criterion_registry("SparseCategoricalCrossentropy", "tensorflow") +class TensorFlowSparseCategoricalCrossentropy(object): + """TensorFlow SparseCategoricalCrossentropyLoss criterion.""" + + def __init__(self, param_dict): + """Initialize the Datasets class. + + Args: + param_dict (string): param_dict. + """ + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = {"reduction": "reduction", "from_logits": "from_logits"} + _dict = {} + for key in self._param_dict: + if key in _param_map: + if key == "reduction": + assert self._param_dict[key] in [ + "auto", + "none", + "sum", + "sum_over_batch_size", + ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self): + """Call the TensorFlowSparseCategoricalCrossentropy. + + Returns: + cls: criterion class. + param_dict(dict): param_dict + """ + return tf.keras.losses.SparseCategoricalCrossentropy, self._mapping() + + +@criterion_registry("CrossEntropyLoss", "pytorch") +class PyTorchCrossEntropyLoss(object): + """PyTorch CrossEntropyLoss criterion.""" + + def __init__(self, param_dict): + """Initialize the PyTorchCrossEntropyLoss class. + + Args: + param_dict (string): param_dict. + """ + assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = {"reduction": "reduction"} + _dict = {} + for key in self._param_dict: + if key in _param_map: + if key == "reduction": + assert self._param_dict[key] in [ + "none", + "mean", + "sum", + ], "Supported reduction value is none, mean, sum" + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self): + """Call the PyTorchCrossEntropyLoss. + + Returns: + cls: criterion class. + param_dict(dict): param_dict + """ + return torch.nn.CrossEntropyLoss, self._mapping() + + class KnowledgeDistillationFramework(object): """Knowledge Distillation Framework.""" @@ -916,7 +1036,7 @@ def register_hooks_for_models(self): Raises: AttributeError: AttributeError """ - from neural_compressor.experimental.common import torch_utils + from neural_compressor.compression.distillation import utility def register_model_forward_hook(model, path, output_process="", student=False): module = model @@ -927,7 +1047,7 @@ def register_model_forward_hook(model, path, output_process="", student=False): module = module.__getattr__(node) except: raise AttributeError("There is no path {} in the model.".format(path)) - return module.register_forward_hook(torch_utils.get_activation(path, output_process, student)) + return module.register_forward_hook(utility.get_activation(path, output_process, student)) assert isinstance(self.student_model, torch.nn.Module) and isinstance(self.teacher_model, torch.nn.Module), ( "Expect student_model and teacher_model to be an torch.nn.Module object, " @@ -939,8 +1059,8 @@ def register_model_forward_hook(model, path, output_process="", student=False): student_output_process, teacher_output_process = self.layer_output_process[idx] st_handle = register_model_forward_hook(self.student_model, student_layer, student_output_process, True) te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, teacher_output_process) - torch_utils.STUDENT_FEATURES = self.student_features - torch_utils.TEACHER_FEATURES = self.teacher_features + utility.STUDENT_FEATURES = self.student_features + utility.TEACHER_FEATURES = self.teacher_features self.hook_handles.extend([st_handle, te_handle]) def remove_all_hooks(self): diff --git a/neural_compressor/compression/distillation/optimizers.py b/neural_compressor/compression/distillation/optimizers.py new file mode 100644 index 00000000000..09475832497 --- /dev/null +++ b/neural_compressor/compression/distillation/optimizers.py @@ -0,0 +1,230 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Intel Neural Compressor built-in Optimizers on multiple framework backends.""" + +from abc import abstractmethod + +from neural_compressor.utils.utility import LazyImport, singleton + +torch = LazyImport("torch") +tf = LazyImport("tensorflow") +tfa = LazyImport("tensorflow_addons") + + +@singleton +class TensorflowOptimizers(object): + """Class to get all registered TensorFlow Optimizers once only.""" + + def __init__(self): + """Initialize `TensorflowOptimizers` class once only.""" + self.optimizers = {} + self.optimizers.update(TENSORFLOW_OPTIMIZERS) + + +@singleton +class PyTorchOptimizers(object): + """Class to get all registered PyTorch Optimizers once only.""" + + def __init__(self): + """Initialize `PyTorchOptimizers` class once only.""" + self.optimizers = {} + self.optimizers.update(PYTORCH_OPTIMIZERS) + + +framework_optimizers = { + "tensorflow": TensorflowOptimizers, + "pytorch": PyTorchOptimizers, + "pytorch_fx": PyTorchOptimizers, +} + +# user/model specific optimizers will be registered here +TENSORFLOW_OPTIMIZERS = {} +PYTORCH_OPTIMIZERS = {} + +registry_optimizers = { + "tensorflow": TENSORFLOW_OPTIMIZERS, + "pytorch": PYTORCH_OPTIMIZERS, + "pytorch_fx": PYTORCH_OPTIMIZERS, +} + + +class Optimizers(object): + """Main entry to get the specific type of optimizer.""" + + def __init__(self, framework): + """Initialize `Optimizers` class.""" + assert framework in ("tensorflow", "pytorch", "pytorch_fx"), "framework support tensorflow pytorch" + self.optimizers = framework_optimizers[framework]().optimizers + + def __getitem__(self, optimizer_type): + """Return the specific type of optimizer object according to the given optimizer_type.""" + assert optimizer_type in self.optimizers.keys(), "only support optimizers in {}".format(self.optimizers.keys()) + + return self.optimizers[optimizer_type] + + def register(self, name, optimizer_cls): + """Allow registration of non-built-in optimizers.""" + assert name not in self.optimizers.keys(), "registered optimizer name already exists." + self.optimizers.update({name: optimizer_cls}) + + +def optimizer_registry(optimizer_type, framework): + """Class decorator used to register all Optimizer subclasses. + + Cross framework optimizer is supported by add param as framework='tensorflow, pytorch' + + Args: + optimizer_type (str): The string of supported criterion. + framework (str): The string of supported framework. + + Returns: + cls: The class of register. + """ + + def decorator_optimizer(cls): + for fw in [fwk.strip() for fwk in framework.split(",")]: + assert fw in ["tensorflow", "pytorch"], "The framework support tensorflow pytorch" + + if optimizer_type in registry_optimizers[fw].keys(): + raise ValueError("Cannot have two optimizers with the same name") + registry_optimizers[fw][optimizer_type] = cls + return cls + + return decorator_optimizer + + +@optimizer_registry("SGD", "tensorflow") +class TensorFlowSGD(object): + """TensorFlow keras SGD optimizer. + + Args: + param_dict (dict): The dict of parameters setting by user for SGD optimizer + """ + + def __init__(self, param_dict): + """Initialize `TensorFlowSGD` class.""" + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = {"learning_rate": "learning_rate", "momentum": "momentum", "nesterov": "nesterov"} + _dict = {} + for key in self._param_dict: + if key in _param_map: + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self, **kwargs): + """Call `TensorFlowSGD` object.""" + return tf.keras.optimizers.SGD, self._mapping(**kwargs) + + +@optimizer_registry("AdamW", "tensorflow") +class TensorFlowAdamW(object): + """tensorflow_addons AdamW optimizer. + + Args: + param_dict (dict): The dict of parameters setting by user for AdamW optimizer + """ + + def __init__(self, param_dict): + """Initialize `TensorFlowAdamW` class.""" + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = { + "learning_rate": "learning_rate", + "weight_decay": "weight_decay", + "beta_1": "beta_1", + "beta_2": "beta_2", + "epsilon": "epsilon", + "amsgrad": "amsgrad", + } + _dict = {} + for key in self._param_dict: + if key in _param_map: + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self, **kwargs): + """Call `TensorFlowAdamW` object.""" + return tfa.optimizers.AdamW, self._mapping(**kwargs) + + +@optimizer_registry("Adam", "tensorflow") +class TensorFlowAdam(object): + """Tensorflow Adam optimizer. + + Args: + param_dict (dict): The dict of parameters setting by user for Adam optimizer + """ + + def __init__(self, param_dict): + """Initialize `TensorFlowAdam` class.""" + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = { + "learning_rate": "learning_rate", + "beta_1": "beta_1", + "beta_2": "beta_2", + "epsilon": "epsilon", + "amsgrad": "amsgrad", + } + _dict = {} + for key in self._param_dict: + if key in _param_map: + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self, **kwargs): + """Call `TensorFlowAdam` object.""" + return tf.keras.optimizers.Adam, self._mapping(**kwargs) + + +@optimizer_registry("SGD", "pytorch") +class PyTorchSGD(object): + """PyTorch SGD optimizer. + + Args: + param_dict (dict): The dict of parameters setting by user for SGD optimizer + """ + + def __init__(self, param_dict): + """Initialize `PyTorchSGD` class.""" + assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" + self._param_dict = param_dict + + def _mapping(self): + _param_map = { + "learning_rate": "lr", + "momentum": "momentum", + "nesterov": "nesterov", + "weight_decay": "weight_decay", + } + _dict = {} + for key in self._param_dict: + if key in _param_map: + _dict.update({_param_map[key]: self._param_dict[key]}) + return _dict + + def __call__(self, **kwargs): + """Call `PyTorchSGD` object.""" + return torch.optim.SGD, self._mapping(**kwargs) diff --git a/neural_compressor/compression/distillation/utility.py b/neural_compressor/compression/distillation/utility.py new file mode 100644 index 00000000000..3042a564951 --- /dev/null +++ b/neural_compressor/compression/distillation/utility.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (c) 2022 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""This is an utility file for PyTorch distillation.""" + +from neural_compressor.utils.utility import LazyImport + +torch = LazyImport("torch") + +STUDENT_FEATURES = {} +TEACHER_FEATURES = {} + + +# for adapting fx model +@torch.fx.wrap +def record_output(output, name, output_process, student=False): + """Record layers output. + + It is a help function. + """ + recorded_output = output + if output_process != "": + if isinstance(output, dict) and output_process in output: + recorded_output = output[output_process] + elif isinstance(output, (tuple, list)) and str.isnumeric(output_process): + recorded_output = output[int(output_process)] + elif callable(output_process): + recorded_output = output_process(output) + else: + raise NotImplementedError( + "Current only support get the data with " + + "integer index in case the output is tuple or list and only " + + "need one item or with key in case the output is dict, " + + "or output_process is a function." + ) + if student: + STUDENT_FEATURES[name].append(recorded_output) + else: + TEACHER_FEATURES[name].append(recorded_output) + return output + + +def get_activation(name, output_process="", student=False): + """Get a hook for getting activation.""" + + def hook(model, input, output): + if model.training or not student: + return record_output(output, name, output_process, student=student) + else: + return output + + return hook diff --git a/neural_compressor/experimental/common/criterion.py b/neural_compressor/experimental/common/criterion.py index 0007c34c43f..1af15a73895 100644 --- a/neural_compressor/experimental/common/criterion.py +++ b/neural_compressor/experimental/common/criterion.py @@ -131,119 +131,6 @@ def decorator_criterion(cls): return decorator_criterion -@criterion_registry("CrossEntropyLoss", "tensorflow") -class TensorFlowCrossEntropyLoss(object): - """TensorFlow CrossEntropyLoss criterion.""" - - def __init__(self, param_dict): - """Initialize the Datasets class. - - Args: - param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion. - """ - assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" - self._param_dict = param_dict - - def _mapping(self): - _param_map = {"reduction": "reduction", "from_logits": "from_logits"} - _dict = {} - for key in self._param_dict: - if key in _param_map: - if key == "reduction": - assert self._param_dict[key] in [ - "auto", - "none", - "sum", - "sum_over_batch_size", - ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" - _dict.update({_param_map[key]: self._param_dict[key]}) - return _dict - - def __call__(self): - """Call the TensorFlowCrossEntropyLoss. - - Returns: - cls: criterion class. - param_dict(dict): param_dict - """ - return tf.keras.losses.CategoricalCrossentropy, self._mapping() - - -@criterion_registry("SparseCategoricalCrossentropy", "tensorflow") -class TensorFlowSparseCategoricalCrossentropy(object): - """TensorFlow SparseCategoricalCrossentropyLoss criterion.""" - - def __init__(self, param_dict): - """Initialize the Datasets class. - - Args: - param_dict (string): param_dict. - """ - assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" - self._param_dict = param_dict - - def _mapping(self): - _param_map = {"reduction": "reduction", "from_logits": "from_logits"} - _dict = {} - for key in self._param_dict: - if key in _param_map: - if key == "reduction": - assert self._param_dict[key] in [ - "auto", - "none", - "sum", - "sum_over_batch_size", - ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size" - _dict.update({_param_map[key]: self._param_dict[key]}) - return _dict - - def __call__(self): - """Call the TensorFlowSparseCategoricalCrossentropy. - - Returns: - cls: criterion class. - param_dict(dict): param_dict - """ - return tf.keras.losses.SparseCategoricalCrossentropy, self._mapping() - - -@criterion_registry("CrossEntropyLoss", "pytorch") -class PyTorchCrossEntropyLoss(object): - """PyTorch CrossEntropyLoss criterion.""" - - def __init__(self, param_dict): - """Initialize the PyTorchCrossEntropyLoss class. - - Args: - param_dict (string): param_dict. - """ - assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict" - self._param_dict = param_dict - - def _mapping(self): - _param_map = {"reduction": "reduction"} - _dict = {} - for key in self._param_dict: - if key in _param_map: - if key == "reduction": - assert self._param_dict[key] in [ - "none", - "mean", - "sum", - ], "Supported reduction value is none, mean, sum" - _dict.update({_param_map[key]: self._param_dict[key]}) - return _dict - - def __call__(self): - """Call the PyTorchCrossEntropyLoss. - - Returns: - cls: criterion class. - param_dict(dict): param_dict - """ - return torch.nn.CrossEntropyLoss, self._mapping() - - class KnowledgeDistillationFramework(object): """Knowledge Distillation Framework.""" @@ -576,177 +463,6 @@ def __call__(self, **kwargs): return PyTorchKnowledgeDistillationLoss, self._param_check() -class TensorflowKnowledgeDistillationLoss(KnowledgeDistillationLoss): - """The TensorflowKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss.""" - - def __init__( - self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None - ): - """Initialize Tensorflow Knowledge Distillation Loss class. - - Args: - temperature (float, optional): Hyperparameters that control the entropy - of probability distributions. Defaults to 1.0. - loss_types (list, optional): loss types. Defaults to ['CE', 'CE']. - loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5]. - student_model (optional): student model. Defaults to None. - teacher_model (optional): teacher model. Defaults to None. - - Raises: - NotImplementedError: NotImplementedError - NotImplementedError: NotImplementedError - """ - super(TensorflowKnowledgeDistillationLoss, self).__init__( - temperature=temperature, - loss_types=loss_types, - loss_weights=loss_weights, - student_model=student_model, - teacher_model=teacher_model, - ) - if self.student_targets_loss is None: - if self.loss_types[0] == "CE": - self.student_targets_loss = tf.keras.losses.SparseCategoricalCrossentropy() - else: - raise NotImplementedError( - "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets." - ) - logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0])) - if self.teacher_student_loss is None: - if self.loss_types[1] == "CE": - self.teacher_student_loss = self.SoftCrossEntropy - elif self.loss_types[1] == "KL": - self.teacher_student_loss = tf.keras.losses.KLDivergence() - else: - raise NotImplementedError( - "Now we only support CrossEntropyLoss" - " for loss of student model output with respect to teacher model output." - ) - logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1])) - - def SoftCrossEntropy(self, targets, logits): - """Return SoftCrossEntropy. - - Args: - logits (tensor): output logits - targets (tensor): ground truth label - - Returns: - tensor: SoftCrossEntropy - """ - log_prob = tf.math.log(logits) - targets_prob = targets - return tf.math.reduce_mean(tf.math.reduce_sum(-targets_prob * log_prob, axis=-1), axis=-1) - - def teacher_model_forward(self, input, teacher_model=None): - """Teacher model forward. - - Args: - input (tensor): input data - teacher_model (optional): teacher model. Defaults to None. - device (torch.device, optional): device. Defaults to None. - - Returns: - tensor: output - """ - outputs = None - if self.loss_weights[1] > 0 and input is not None: - model = self.teacher_model if teacher_model is None else teacher_model - if isinstance(input, list) or isinstance(input, tuple): # pragma: no cover - outputs = model(*input, training=True) - elif isinstance(input, dict): # pragma: no cover - outputs = model(**input, training=True) - else: - outputs = model(input, training=True) - self.teacher_outputs = outputs - return outputs - - def teacher_student_loss_cal(self, student_outputs, teacher_outputs): - """Calculate loss between student model and teacher model. - - Args: - student_outputs (tensor): student outputs - teacher_outputs (tensor): teacher outputs - - Returns: - tensor: loss - """ - assert self.teacher_student_loss, "teacher_student_loss not specified." - return self.teacher_student_loss(teacher_outputs, student_outputs) - - def student_targets_loss_cal(self, student_outputs, targets): - """Calculate loss of student model. - - Args: - student_outputs (tensor): student outputs - targets (tensor): groud truth label - - Returns: - tensor: loss - """ - assert self.student_targets_loss, "student_targets_loss not specified." - return self.student_targets_loss(targets, student_outputs) - - def __call__(self, student_outputs, targets): - """Return loss of student model. - - Args: - student_outputs (tensor): student outputs - targets (tensor): groud truth label - - Returns: - tensor: loss - """ - tmp = student_outputs - student_outputs = targets - targets = tmp - return self.loss_cal(student_outputs, targets) - - -@criterion_registry("KnowledgeDistillationLoss", "tensorflow") -class TensorflowKnowledgeDistillationLossWrapper(object): - """TensorflowKnowledgeDistillationLossWrapper wraps TensorflowKnowledgeDistillationLoss.""" - - def __init__(self, param_dict): - """Initialize Tensorflow Knowledge Distillation Loss Wrapper. - - Args: - param_dict (dict): parameter dict - """ - self.param_dict = param_dict - - def _param_check(self): - param_dict = self.param_dict - _params = ["temperature", "loss_types", "loss_weights"] - assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params) - assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." - assert len(param_dict["loss_types"]) == len( - param_dict["loss_weights"] - ), "Length of loss_types and loss_weights must be the same." - assert all( - type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"] - ), "Type of loss_types and loss_weights must be list or tuple." - assert all( - any(isinstance(e, t) for t in [str, tf.keras]) for e in param_dict["loss_types"] - ), "Type of loss_types element must be str or torch Module." - assert ( - all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"]) - and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9 - ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0." - new_dict = {} - for k in _params: - new_dict[k] = param_dict[k] - return new_dict - - def __call__(self, **kwargs): - """Return TensorflowKnowledgeDistillationLoss, param dict. - - Returns: - class: TensorflowKnowledgeDistillationLoss - param dict (dict): param dict - """ - return TensorflowKnowledgeDistillationLoss, self._param_check() - - class TensorflowKnowledgeDistillationLossExternal(KnowledgeDistillationLoss): """TensorflowKnowledgeDistillationLossExternal inherits from KnowledgeDistillationLoss.""" @@ -1399,224 +1115,3 @@ def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss): def __call__(self, student_outputs, targets): """Return 0.""" return 0 - - -class PyTorchSelfKnowledgeDistillationLoss(SelfKnowledgeDistillationLoss): - """PyTorch SelfKnowledge Distillation Loss.""" - - def __init__( - self, - layer_mappings=[], - loss_types=None, - loss_weights=None, - temperature=1.0, - add_origin_loss=False, - student_model=None, - teacher_model=None, - ): - """Initialize PyTorch SelfKnowledge Distillation Loss class. - - Args: - layer_mappings (list): layers of distillation.Format like - [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]], - [[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]] - loss_types (list, optional): loss types. Defaults to ['CE'] * len(layer_mappings). - loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] * - len(layer_mappings).temperature (float, optional): use to calculate the soft label CE. - temperature (optional): temperature. Defaults to 1.0. - add_origin_loss (bool, optional): whether to add origin loss for hard label loss. - student_model (optional): student model. Defaults to None. - teacher_model (optional): teacher model. Defaults to None. - """ - super(PyTorchSelfKnowledgeDistillationLoss, self).__init__( - layer_mappings=layer_mappings, - loss_types=loss_types, - loss_weights=loss_weights, - temperature=temperature, - add_origin_loss=add_origin_loss, - student_model=student_model, - teacher_model=teacher_model, - ) - - def SoftCrossEntropy(self, logits, targets): - """Return SoftCrossEntropy. - - Args: - logits (tensor): output logits - targets (tensor): ground truth label - - Returns: - tensor: SoftCrossEntropy - """ - log_prob = torch.nn.functional.log_softmax(logits, dim=-1) - targets_prob = torch.nn.functional.softmax(targets, dim=-1) - return (-targets_prob * log_prob).sum(dim=-1).mean() - - def KullbackLeiblerDivergence(self, logits, targets): - """Return KullbackLeiblerDivergence. - - Args: - logits (tensor): output logits - targets (tensor): ground truth label - - Returns: - tensor: KullbackLeiblerDivergence - """ - log_prob = torch.nn.functional.log_softmax(logits, dim=-1) - targets_prob = torch.nn.functional.softmax(targets, dim=-1) - return torch.nn.functional.kl_div(log_prob, targets_prob) - - def L2Divergence(self, feature1, feature2): - """Return L2Divergence. - - Args: - feature1 (tensor): feature1 value - feature2 (tensor): feature2 value - - Returns: - tensor: L2Divergence between feature1 and feature2 - """ - return torch.dist(feature1, feature2) - - def init_loss_funcs(self): - """Init loss funcs.""" - for loss_type in self.loss_types: - if loss_type == "CE": - loss_func = self.SoftCrossEntropy - elif loss_type == "KL": - loss_func = self.KullbackLeiblerDivergence - elif loss_type == "L2": - loss_func = self.L2Divergence - else: - raise NotImplementedError( - f"Unsupported loss type {loss_type}, supported loss is " - "CE for software CE, KL for Kullback-Leibler divergence and " - "L2 for L2 distance." - ) - self.loss_funcs.append(loss_func) - - def loss_cal(self, student_outputs): - """Calculate loss of student model. - - Args: - student_outputs (dict): student outputs - - Returns: - tensor: loss - """ - self.loss = torch.FloatTensor([0.0]) - tmp_loss = 0 - temperature = self.temperature - for loss_idx in range(len(self.layer_mappings)): - items = self.layer_mappings[loss_idx] - for idx in range(len(items)): - student_layer, teacher_layer = items[idx] - student_feature = student_outputs[student_layer] - teacher_feature = student_outputs[teacher_layer] - if loss_idx == 1: # soft logit - tmp_loss += ( - self.loss_funcs[loss_idx](student_feature / temperature, teacher_feature / temperature) - * self.loss_weights[loss_idx] - ) - else: # feature learning - tmp_loss += ( - self.loss_funcs[loss_idx](student_feature, teacher_feature) * self.loss_weights[loss_idx] - ) - if tmp_loss.device != self.loss.device: - self.loss = self.loss.to(tmp_loss.device) - self.loss += tmp_loss - return self.loss - - def teacher_model_forward(self, input, teacher_model=None, device=None): - """Teacher model forward. - - Args: - input (tensor): input data - teacher_model (torch.nn.model, optional): teacher model. Defaults to None. - device (torch.device, optional): device. Defaults to None. - - Returns: - tensor: output - """ - outputs = None - if self.loss_weights[1] > 0: - model = self.teacher_model if teacher_model is None else teacher_model - assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format( - type(model) - ) - model.eval() - try: - model_device = next(model.parameters()).device - except: - logger.warning("Cannot get model device, assuming it's in CPU.") - model_device = "cpu" - device = model_device if device is None else device - if device != model_device: - model.to(device) - with torch.no_grad(): - outputs = pytorch_forward_wrapper(model, input, device=device) - self.teacher_outputs = outputs - return outputs - - -@criterion_registry("SelfKnowledgeDistillationLoss", "pytorch") -class PyTorchSelfKnowledgeDistillationLossWrapper(object): - """PyTorch SelfKnowledge Distillation Loss Wrapper.""" - - def __init__(self, param_dict): - """Initialize PyTorchSelfKnowledgeDistillationLossWrapper class. - - Args: - param_dict (dict): param dict - """ - self.param_dict = param_dict - - def _param_check(self): - param_dict = self.param_dict - _params = ["temperature", "layer_mappings", "loss_types", "loss_weights", "add_origin_loss"] - layer_mappings = param_dict["layer_mappings"] - if "loss_types" not in param_dict: - param_dict["loss_types"] = ["CE"] * len(layer_mappings) - if "loss_weights" not in param_dict: - param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings) - if "add_origin_loss" not in param_dict: - param_dict["add_origin_loss"] = False - if "temperature" not in param_dict: - param_dict["temperature"] = 1.0 - assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters." - assert all( - type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"] - ), "Type of loss_types and loss_weights must be list or tuple." - assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool." - assert ( - len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"]) - ), "Length of layer_mappings, loss_types and loss_weights must be the same." - assert param_dict["temperature"] > 0.0, "Value of temperature must be positive." - for items in param_dict["layer_mappings"]: - assert all(type(it) in [list, tuple] and (len(it) == 2) for it in items), ( - "Elements of layer_mappings must be list or tuple and with length of 2." - + "element looks like ['resblock.1.feature.output," - + "'resblock.deepst.feature.output'], where " - + "'resblock.1.feature.output' and 'resblock.deepst.feature.output' " - + "represent resblock feature output of the student model and feature output of the" - + "teacher model respectively." - ) - assert all( - any(isinstance(e, t) for t in [str]) for e in param_dict["loss_types"] - ), "Type of loss_types element must be str." - assert all( - 0.0 <= e <= 1.0 for e in param_dict["loss_weights"] - ), "Element of loss_weights must be in interval [0, 1]." - new_dict = {} - for k in _params: - new_dict[k] = param_dict[k] - return new_dict - - def __call__(self, **kwargs): - """Return PyTorchSelfKnowledgeDistillationLoss, param dict. - - Returns: - class: PyTorchSelfKnowledgeDistillationLoss - param dict (dict): param dict - """ - return PyTorchSelfKnowledgeDistillationLoss, self._param_check() diff --git a/neural_compressor/experimental/common/optimizer.py b/neural_compressor/experimental/common/optimizer.py index 877c02ed3dc..2de24629cc7 100644 --- a/neural_compressor/experimental/common/optimizer.py +++ b/neural_compressor/experimental/common/optimizer.py @@ -167,38 +167,6 @@ def __call__(self, **kwargs): return tfa.optimizers.AdamW, self._mapping(**kwargs) -@optimizer_registry("Adam", "tensorflow") -class TensorFlowAdam(object): - """Tensorflow Adam optimizer. - - Args: - param_dict (dict): The dict of parameters setting by user for Adam optimizer - """ - - def __init__(self, param_dict): - """Initialize `TensorFlowAdam` class.""" - assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict" - self._param_dict = param_dict - - def _mapping(self): - _param_map = { - "learning_rate": "learning_rate", - "beta_1": "beta_1", - "beta_2": "beta_2", - "epsilon": "epsilon", - "amsgrad": "amsgrad", - } - _dict = {} - for key in self._param_dict: - if key in _param_map: - _dict.update({_param_map[key]: self._param_dict[key]}) - return _dict - - def __call__(self, **kwargs): - """Call `TensorFlowAdam` object.""" - return tf.keras.optimizers.Adam, self._mapping(**kwargs) - - @optimizer_registry("SGD", "pytorch") class PyTorchSGD(object): """PyTorch SGD optimizer. diff --git a/neural_compressor/profiling/profiler/factory.py b/neural_compressor/profiling/profiler/factory.py index 2273e0375e0..566de6cabb6 100644 --- a/neural_compressor/profiling/profiler/factory.py +++ b/neural_compressor/profiling/profiler/factory.py @@ -15,7 +15,7 @@ """Profiling class factory.""" from typing import Optional -from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader +from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader from neural_compressor.model import BaseModel from neural_compressor.profiling.profiler.onnxrt_profiler.factory import ProfilerFactory as OnnxrtProfilerFactory from neural_compressor.profiling.profiler.profiler import Profiler diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py index 0317c557f6c..0baa6e35756 100644 --- a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py +++ b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py @@ -16,7 +16,7 @@ from typing import Optional -from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader +from neural_compressor.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.profiling.profiler.onnxrt_profiler.profiler import Profiler diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py index 3ec6c866d95..a248b73927c 100644 --- a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py +++ b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py @@ -18,7 +18,7 @@ from pathlib import Path from typing import Optional -from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader +from neural_compressor.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader from neural_compressor.model.onnx_model import ONNXModel from neural_compressor.profiling.profiler.onnxrt_profiler.utils import create_onnx_config from neural_compressor.profiling.profiler.profiler import Profiler as Parent diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py index 888e66eb53c..245bf307339 100644 --- a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py +++ b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py @@ -16,7 +16,7 @@ from typing import Optional -from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader +from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.profiling.profiler.profiler import Profiler from neural_compressor.profiling.profiler.tensorflow_profiler.profiler import Profiler as FrozenPbProfiler diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py index 06802d05b89..d5564a75c40 100644 --- a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py +++ b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py @@ -17,7 +17,7 @@ from pathlib import Path from typing import Optional -from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader +from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader from neural_compressor.model.tensorflow_model import TensorflowBaseModel from neural_compressor.profiling.profiler.profiler import Profiler as Parent diff --git a/neural_compressor/utils/create_obj_from_config.py b/neural_compressor/utils/create_obj_from_config.py index 6db34e3bf55..fa1ddafca95 100644 --- a/neural_compressor/utils/create_obj_from_config.py +++ b/neural_compressor/utils/create_obj_from_config.py @@ -189,7 +189,8 @@ def create_train_func(framework, dataloader, adaptor, train_cfg, hooks=None, cal assert dataloader, "dataloader should NOT be empty when train_func is None" assert adaptor, "adaptor should NOT be empty" - from neural_compressor.experimental.common import Criterions, Optimizers + from neural_compressor.compression.distillation.criterions import Criterions + from neural_compressor.compression.distillation.optimizers import Optimizers postprocess_cfg = train_cfg.postprocess if postprocess_cfg is not None: diff --git a/test/distillation/test_distillation_1.x.py b/test/distillation/test_distillation_1.x.py index d754b6dea88..802f81148a5 100644 --- a/test/distillation/test_distillation_1.x.py +++ b/test/distillation/test_distillation_1.x.py @@ -261,19 +261,6 @@ def test_distillation_external_new_API(self): stat = torch.load("./saved/best_model.pt") opt_model = self.student_model.load_state_dict(stat) - @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.3.0"), " keras requires higher version than tf-2.3.0") - def test_tf_distillation(self): - from neural_compressor.conf.config import DistillationConf - from neural_compressor.experimental import Distillation - - conf = DistillationConf("fake_1.yaml") - distiller = Distillation(conf) - distiller = Distillation("fake_1.yaml") - distiller.student_model = self.student_model_tf - distiller.teacher_model = self.teacher_model_tf - print("student model: {}".format(distiller.student_model)) - _ = distiller.fit() - if __name__ == "__main__": unittest.main()