diff --git a/neural_compressor/adaptor/tensorflow.py b/neural_compressor/adaptor/tensorflow.py
index 094795b37b7..b28cf65175f 100644
--- a/neural_compressor/adaptor/tensorflow.py
+++ b/neural_compressor/adaptor/tensorflow.py
@@ -190,7 +190,7 @@ def train(self, model, dataloader, optimizer_tuple, criterion_tuple, hooks, post
         callbacks = kwargs["kwargs"].get("callbacks", None)
         execution_mode = kwargs["kwargs"].get("execution_mode", None)
         distributed = getattr(dataloader, "distributed", False)
-        from neural_compressor.experimental.common.criterion import TensorflowKnowledgeDistillationLoss
+        from neural_compressor.compression.distillation.criterions import TensorflowKnowledgeDistillationLoss
 
         if isinstance(criterion, TensorflowKnowledgeDistillationLoss):
             input_model = model._model
@@ -1757,8 +1757,8 @@ def _get_mse_order(
 
     def _partial_dataset_of(self, dataloader, confidence_batches):
         """Partial dataset."""
+        from neural_compressor.data.datasets.dummy_dataset import DummyDataset
         from neural_compressor.data.datasets.dummy_dataset import DummyDataset as DummyDataset_v2_x
-        from neural_compressor.experimental.data.datasets.dummy_dataset import DummyDataset
 
         if isinstance(dataloader.dataset, DummyDataset) or isinstance(dataloader.dataset, DummyDataset_v2_x):
             assert isinstance(confidence_batches, int)
diff --git a/neural_compressor/compression/distillation/criterions.py b/neural_compressor/compression/distillation/criterions.py
index 745260699a2..7c9b4896ae4 100644
--- a/neural_compressor/compression/distillation/criterions.py
+++ b/neural_compressor/compression/distillation/criterions.py
@@ -17,6 +17,8 @@
 """Initialize critetion classes.
 
 Classes includes:
+    TensorFlowCrossEntropyLoss, PyTorchCrossEntropyLoss,
+    TensorFlowSparseCategoricalCrossentropy,
     TensorflowKnowledgeDistillationLoss, PyTorchKnowledgeDistillationLoss,
     PyTorchIntermediateLayersKnowledgeDistillationLoss.
 """
@@ -91,7 +93,12 @@ def __getitem__(self, criterion_type):
         Returns:
             cls: criterion class.
         """
-        assert criterion_type in self.criterions.keys(), "only support criterions in {}".format(self.criterions.keys())
+        assert (
+            criterion_type in self.criterions.keys()
+        ), "only support criterions in {} \
+            , but got criterion type {}".format(
+            self.criterions.keys(), criterion_type
+        )
 
         return self.criterions[criterion_type]
 
@@ -130,6 +137,119 @@ def decorator_criterion(cls):
     return decorator_criterion
 
 
+@criterion_registry("CrossEntropyLoss", "tensorflow")
+class TensorFlowCrossEntropyLoss(object):
+    """TensorFlow CrossEntropyLoss criterion."""
+
+    def __init__(self, param_dict):
+        """Initialize the Datasets class.
+
+        Args:
+            param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion.
+        """
+        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {"reduction": "reduction", "from_logits": "from_logits"}
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                if key == "reduction":
+                    assert self._param_dict[key] in [
+                        "auto",
+                        "none",
+                        "sum",
+                        "sum_over_batch_size",
+                    ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size"
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self):
+        """Call the TensorFlowCrossEntropyLoss.
+
+        Returns:
+            cls: criterion class.
+            param_dict(dict): param_dict
+        """
+        return tf.keras.losses.CategoricalCrossentropy, self._mapping()
+
+
+@criterion_registry("SparseCategoricalCrossentropy", "tensorflow")
+class TensorFlowSparseCategoricalCrossentropy(object):
+    """TensorFlow SparseCategoricalCrossentropyLoss criterion."""
+
+    def __init__(self, param_dict):
+        """Initialize the Datasets class.
+
+        Args:
+            param_dict (string): param_dict.
+        """
+        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {"reduction": "reduction", "from_logits": "from_logits"}
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                if key == "reduction":
+                    assert self._param_dict[key] in [
+                        "auto",
+                        "none",
+                        "sum",
+                        "sum_over_batch_size",
+                    ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size"
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self):
+        """Call the TensorFlowSparseCategoricalCrossentropy.
+
+        Returns:
+            cls: criterion class.
+            param_dict(dict): param_dict
+        """
+        return tf.keras.losses.SparseCategoricalCrossentropy, self._mapping()
+
+
+@criterion_registry("CrossEntropyLoss", "pytorch")
+class PyTorchCrossEntropyLoss(object):
+    """PyTorch CrossEntropyLoss criterion."""
+
+    def __init__(self, param_dict):
+        """Initialize the PyTorchCrossEntropyLoss class.
+
+        Args:
+            param_dict (string): param_dict.
+        """
+        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {"reduction": "reduction"}
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                if key == "reduction":
+                    assert self._param_dict[key] in [
+                        "none",
+                        "mean",
+                        "sum",
+                    ], "Supported reduction value is none, mean, sum"
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self):
+        """Call the PyTorchCrossEntropyLoss.
+
+        Returns:
+            cls: criterion class.
+            param_dict(dict): param_dict
+        """
+        return torch.nn.CrossEntropyLoss, self._mapping()
+
+
 class KnowledgeDistillationFramework(object):
     """Knowledge Distillation Framework."""
 
@@ -916,7 +1036,7 @@ def register_hooks_for_models(self):
         Raises:
             AttributeError: AttributeError
         """
-        from neural_compressor.experimental.common import torch_utils
+        from neural_compressor.compression.distillation import utility
 
         def register_model_forward_hook(model, path, output_process="", student=False):
             module = model
@@ -927,7 +1047,7 @@ def register_model_forward_hook(model, path, output_process="", student=False):
                         module = module.__getattr__(node)
                     except:
                         raise AttributeError("There is no path {} in the model.".format(path))
-            return module.register_forward_hook(torch_utils.get_activation(path, output_process, student))
+            return module.register_forward_hook(utility.get_activation(path, output_process, student))
 
         assert isinstance(self.student_model, torch.nn.Module) and isinstance(self.teacher_model, torch.nn.Module), (
             "Expect student_model and teacher_model to be an torch.nn.Module object, "
@@ -939,8 +1059,8 @@ def register_model_forward_hook(model, path, output_process="", student=False):
             student_output_process, teacher_output_process = self.layer_output_process[idx]
             st_handle = register_model_forward_hook(self.student_model, student_layer, student_output_process, True)
             te_handle = register_model_forward_hook(self.teacher_model, teacher_layer, teacher_output_process)
-            torch_utils.STUDENT_FEATURES = self.student_features
-            torch_utils.TEACHER_FEATURES = self.teacher_features
+            utility.STUDENT_FEATURES = self.student_features
+            utility.TEACHER_FEATURES = self.teacher_features
             self.hook_handles.extend([st_handle, te_handle])
 
     def remove_all_hooks(self):
diff --git a/neural_compressor/compression/distillation/optimizers.py b/neural_compressor/compression/distillation/optimizers.py
new file mode 100644
index 00000000000..09475832497
--- /dev/null
+++ b/neural_compressor/compression/distillation/optimizers.py
@@ -0,0 +1,230 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2023 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Intel Neural Compressor built-in Optimizers on multiple framework backends."""
+
+from abc import abstractmethod
+
+from neural_compressor.utils.utility import LazyImport, singleton
+
+torch = LazyImport("torch")
+tf = LazyImport("tensorflow")
+tfa = LazyImport("tensorflow_addons")
+
+
+@singleton
+class TensorflowOptimizers(object):
+    """Class to get all registered TensorFlow Optimizers once only."""
+
+    def __init__(self):
+        """Initialize `TensorflowOptimizers` class once only."""
+        self.optimizers = {}
+        self.optimizers.update(TENSORFLOW_OPTIMIZERS)
+
+
+@singleton
+class PyTorchOptimizers(object):
+    """Class to get all registered PyTorch Optimizers once only."""
+
+    def __init__(self):
+        """Initialize `PyTorchOptimizers` class once only."""
+        self.optimizers = {}
+        self.optimizers.update(PYTORCH_OPTIMIZERS)
+
+
+framework_optimizers = {
+    "tensorflow": TensorflowOptimizers,
+    "pytorch": PyTorchOptimizers,
+    "pytorch_fx": PyTorchOptimizers,
+}
+
+# user/model specific optimizers will be registered here
+TENSORFLOW_OPTIMIZERS = {}
+PYTORCH_OPTIMIZERS = {}
+
+registry_optimizers = {
+    "tensorflow": TENSORFLOW_OPTIMIZERS,
+    "pytorch": PYTORCH_OPTIMIZERS,
+    "pytorch_fx": PYTORCH_OPTIMIZERS,
+}
+
+
+class Optimizers(object):
+    """Main entry to get the specific type of optimizer."""
+
+    def __init__(self, framework):
+        """Initialize `Optimizers` class."""
+        assert framework in ("tensorflow", "pytorch", "pytorch_fx"), "framework support tensorflow pytorch"
+        self.optimizers = framework_optimizers[framework]().optimizers
+
+    def __getitem__(self, optimizer_type):
+        """Return the specific type of optimizer object according to the given optimizer_type."""
+        assert optimizer_type in self.optimizers.keys(), "only support optimizers in {}".format(self.optimizers.keys())
+
+        return self.optimizers[optimizer_type]
+
+    def register(self, name, optimizer_cls):
+        """Allow registration of non-built-in optimizers."""
+        assert name not in self.optimizers.keys(), "registered optimizer name already exists."
+        self.optimizers.update({name: optimizer_cls})
+
+
+def optimizer_registry(optimizer_type, framework):
+    """Class decorator used to register all Optimizer subclasses.
+
+       Cross framework optimizer is supported by add param as framework='tensorflow, pytorch'
+
+    Args:
+        optimizer_type (str): The string of supported criterion.
+        framework (str): The string of supported framework.
+
+    Returns:
+        cls: The class of register.
+    """
+
+    def decorator_optimizer(cls):
+        for fw in [fwk.strip() for fwk in framework.split(",")]:
+            assert fw in ["tensorflow", "pytorch"], "The framework support tensorflow pytorch"
+
+            if optimizer_type in registry_optimizers[fw].keys():
+                raise ValueError("Cannot have two optimizers with the same name")
+            registry_optimizers[fw][optimizer_type] = cls
+        return cls
+
+    return decorator_optimizer
+
+
+@optimizer_registry("SGD", "tensorflow")
+class TensorFlowSGD(object):
+    """TensorFlow keras SGD optimizer.
+
+    Args:
+        param_dict (dict): The dict of parameters setting by user for SGD optimizer
+    """
+
+    def __init__(self, param_dict):
+        """Initialize `TensorFlowSGD` class."""
+        assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {"learning_rate": "learning_rate", "momentum": "momentum", "nesterov": "nesterov"}
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self, **kwargs):
+        """Call `TensorFlowSGD` object."""
+        return tf.keras.optimizers.SGD, self._mapping(**kwargs)
+
+
+@optimizer_registry("AdamW", "tensorflow")
+class TensorFlowAdamW(object):
+    """tensorflow_addons AdamW optimizer.
+
+    Args:
+        param_dict (dict): The dict of parameters setting by user for AdamW optimizer
+    """
+
+    def __init__(self, param_dict):
+        """Initialize `TensorFlowAdamW` class."""
+        assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {
+            "learning_rate": "learning_rate",
+            "weight_decay": "weight_decay",
+            "beta_1": "beta_1",
+            "beta_2": "beta_2",
+            "epsilon": "epsilon",
+            "amsgrad": "amsgrad",
+        }
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self, **kwargs):
+        """Call `TensorFlowAdamW` object."""
+        return tfa.optimizers.AdamW, self._mapping(**kwargs)
+
+
+@optimizer_registry("Adam", "tensorflow")
+class TensorFlowAdam(object):
+    """Tensorflow Adam optimizer.
+
+    Args:
+        param_dict (dict): The dict of parameters setting by user for Adam optimizer
+    """
+
+    def __init__(self, param_dict):
+        """Initialize `TensorFlowAdam` class."""
+        assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {
+            "learning_rate": "learning_rate",
+            "beta_1": "beta_1",
+            "beta_2": "beta_2",
+            "epsilon": "epsilon",
+            "amsgrad": "amsgrad",
+        }
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self, **kwargs):
+        """Call `TensorFlowAdam` object."""
+        return tf.keras.optimizers.Adam, self._mapping(**kwargs)
+
+
+@optimizer_registry("SGD", "pytorch")
+class PyTorchSGD(object):
+    """PyTorch SGD optimizer.
+
+    Args:
+        param_dict (dict): The dict of parameters setting by user for SGD optimizer
+    """
+
+    def __init__(self, param_dict):
+        """Initialize `PyTorchSGD` class."""
+        assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict"
+        self._param_dict = param_dict
+
+    def _mapping(self):
+        _param_map = {
+            "learning_rate": "lr",
+            "momentum": "momentum",
+            "nesterov": "nesterov",
+            "weight_decay": "weight_decay",
+        }
+        _dict = {}
+        for key in self._param_dict:
+            if key in _param_map:
+                _dict.update({_param_map[key]: self._param_dict[key]})
+        return _dict
+
+    def __call__(self, **kwargs):
+        """Call `PyTorchSGD` object."""
+        return torch.optim.SGD, self._mapping(**kwargs)
diff --git a/neural_compressor/compression/distillation/utility.py b/neural_compressor/compression/distillation/utility.py
new file mode 100644
index 00000000000..3042a564951
--- /dev/null
+++ b/neural_compressor/compression/distillation/utility.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2022 Intel Corporation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This is an utility file for PyTorch distillation."""
+
+from neural_compressor.utils.utility import LazyImport
+
+torch = LazyImport("torch")
+
+STUDENT_FEATURES = {}
+TEACHER_FEATURES = {}
+
+
+# for adapting fx model
+@torch.fx.wrap
+def record_output(output, name, output_process, student=False):
+    """Record layers output.
+
+    It is a help function.
+    """
+    recorded_output = output
+    if output_process != "":
+        if isinstance(output, dict) and output_process in output:
+            recorded_output = output[output_process]
+        elif isinstance(output, (tuple, list)) and str.isnumeric(output_process):
+            recorded_output = output[int(output_process)]
+        elif callable(output_process):
+            recorded_output = output_process(output)
+        else:
+            raise NotImplementedError(
+                "Current only support get the data with "
+                + "integer index in case the output is tuple or list and only "
+                + "need one item or with key in case the output is dict,  "
+                + "or output_process is a function."
+            )
+    if student:
+        STUDENT_FEATURES[name].append(recorded_output)
+    else:
+        TEACHER_FEATURES[name].append(recorded_output)
+    return output
+
+
+def get_activation(name, output_process="", student=False):
+    """Get a hook for getting activation."""
+
+    def hook(model, input, output):
+        if model.training or not student:
+            return record_output(output, name, output_process, student=student)
+        else:
+            return output
+
+    return hook
diff --git a/neural_compressor/experimental/common/criterion.py b/neural_compressor/experimental/common/criterion.py
index 0007c34c43f..1af15a73895 100644
--- a/neural_compressor/experimental/common/criterion.py
+++ b/neural_compressor/experimental/common/criterion.py
@@ -131,119 +131,6 @@ def decorator_criterion(cls):
     return decorator_criterion
 
 
-@criterion_registry("CrossEntropyLoss", "tensorflow")
-class TensorFlowCrossEntropyLoss(object):
-    """TensorFlow CrossEntropyLoss criterion."""
-
-    def __init__(self, param_dict):
-        """Initialize the Datasets class.
-
-        Args:
-            param_dict (dict): The dict of parameters setting by user for CrossEntropyLoss criterion.
-        """
-        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
-        self._param_dict = param_dict
-
-    def _mapping(self):
-        _param_map = {"reduction": "reduction", "from_logits": "from_logits"}
-        _dict = {}
-        for key in self._param_dict:
-            if key in _param_map:
-                if key == "reduction":
-                    assert self._param_dict[key] in [
-                        "auto",
-                        "none",
-                        "sum",
-                        "sum_over_batch_size",
-                    ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size"
-                _dict.update({_param_map[key]: self._param_dict[key]})
-        return _dict
-
-    def __call__(self):
-        """Call the TensorFlowCrossEntropyLoss.
-
-        Returns:
-            cls: criterion class.
-            param_dict(dict): param_dict
-        """
-        return tf.keras.losses.CategoricalCrossentropy, self._mapping()
-
-
-@criterion_registry("SparseCategoricalCrossentropy", "tensorflow")
-class TensorFlowSparseCategoricalCrossentropy(object):
-    """TensorFlow SparseCategoricalCrossentropyLoss criterion."""
-
-    def __init__(self, param_dict):
-        """Initialize the Datasets class.
-
-        Args:
-            param_dict (string): param_dict.
-        """
-        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
-        self._param_dict = param_dict
-
-    def _mapping(self):
-        _param_map = {"reduction": "reduction", "from_logits": "from_logits"}
-        _dict = {}
-        for key in self._param_dict:
-            if key in _param_map:
-                if key == "reduction":
-                    assert self._param_dict[key] in [
-                        "auto",
-                        "none",
-                        "sum",
-                        "sum_over_batch_size",
-                    ], "Supported reduction value for tensorflow is auto, none, sum, sum_over_batch_size"
-                _dict.update({_param_map[key]: self._param_dict[key]})
-        return _dict
-
-    def __call__(self):
-        """Call the TensorFlowSparseCategoricalCrossentropy.
-
-        Returns:
-            cls: criterion class.
-            param_dict(dict): param_dict
-        """
-        return tf.keras.losses.SparseCategoricalCrossentropy, self._mapping()
-
-
-@criterion_registry("CrossEntropyLoss", "pytorch")
-class PyTorchCrossEntropyLoss(object):
-    """PyTorch CrossEntropyLoss criterion."""
-
-    def __init__(self, param_dict):
-        """Initialize the PyTorchCrossEntropyLoss class.
-
-        Args:
-            param_dict (string): param_dict.
-        """
-        assert isinstance(param_dict, dict), "This criterion constructor parameter must be a dict"
-        self._param_dict = param_dict
-
-    def _mapping(self):
-        _param_map = {"reduction": "reduction"}
-        _dict = {}
-        for key in self._param_dict:
-            if key in _param_map:
-                if key == "reduction":
-                    assert self._param_dict[key] in [
-                        "none",
-                        "mean",
-                        "sum",
-                    ], "Supported reduction value is none, mean, sum"
-                _dict.update({_param_map[key]: self._param_dict[key]})
-        return _dict
-
-    def __call__(self):
-        """Call the PyTorchCrossEntropyLoss.
-
-        Returns:
-            cls: criterion class.
-            param_dict(dict): param_dict
-        """
-        return torch.nn.CrossEntropyLoss, self._mapping()
-
-
 class KnowledgeDistillationFramework(object):
     """Knowledge Distillation Framework."""
 
@@ -576,177 +463,6 @@ def __call__(self, **kwargs):
         return PyTorchKnowledgeDistillationLoss, self._param_check()
 
 
-class TensorflowKnowledgeDistillationLoss(KnowledgeDistillationLoss):
-    """The TensorflowKnowledgeDistillationLoss class inherits from KnowledgeDistillationLoss."""
-
-    def __init__(
-        self, temperature=1.0, loss_types=["CE", "CE"], loss_weights=[0.5, 0.5], student_model=None, teacher_model=None
-    ):
-        """Initialize Tensorflow Knowledge Distillation Loss class.
-
-        Args:
-            temperature (float, optional): Hyperparameters that control the entropy
-                                            of probability distributions. Defaults to 1.0.
-            loss_types (list, optional): loss types. Defaults to ['CE', 'CE'].
-            loss_weights (list, optional): loss weights. Defaults to [0.5, 0.5].
-            student_model (optional): student model. Defaults to None.
-            teacher_model (optional): teacher model. Defaults to None.
-
-        Raises:
-            NotImplementedError: NotImplementedError
-            NotImplementedError: NotImplementedError
-        """
-        super(TensorflowKnowledgeDistillationLoss, self).__init__(
-            temperature=temperature,
-            loss_types=loss_types,
-            loss_weights=loss_weights,
-            student_model=student_model,
-            teacher_model=teacher_model,
-        )
-        if self.student_targets_loss is None:
-            if self.loss_types[0] == "CE":
-                self.student_targets_loss = tf.keras.losses.SparseCategoricalCrossentropy()
-            else:
-                raise NotImplementedError(
-                    "Now we only support CrossEntropyLoss " "for loss of student model output with respect to targets."
-                )
-            logger.info("student_targets_loss: {}, {}".format(self.loss_types[0], self.loss_weights[0]))
-        if self.teacher_student_loss is None:
-            if self.loss_types[1] == "CE":
-                self.teacher_student_loss = self.SoftCrossEntropy
-            elif self.loss_types[1] == "KL":
-                self.teacher_student_loss = tf.keras.losses.KLDivergence()
-            else:
-                raise NotImplementedError(
-                    "Now we only support CrossEntropyLoss"
-                    " for loss of student model output with respect to teacher model output."
-                )
-            logger.info("teacher_student_loss: {}, {}".format(self.loss_types[1], self.loss_weights[1]))
-
-    def SoftCrossEntropy(self, targets, logits):
-        """Return SoftCrossEntropy.
-
-        Args:
-            logits (tensor): output logits
-            targets (tensor): ground truth label
-
-        Returns:
-            tensor: SoftCrossEntropy
-        """
-        log_prob = tf.math.log(logits)
-        targets_prob = targets
-        return tf.math.reduce_mean(tf.math.reduce_sum(-targets_prob * log_prob, axis=-1), axis=-1)
-
-    def teacher_model_forward(self, input, teacher_model=None):
-        """Teacher model forward.
-
-        Args:
-            input (tensor): input data
-            teacher_model (optional): teacher model. Defaults to None.
-            device (torch.device, optional): device. Defaults to None.
-
-        Returns:
-            tensor: output
-        """
-        outputs = None
-        if self.loss_weights[1] > 0 and input is not None:
-            model = self.teacher_model if teacher_model is None else teacher_model
-            if isinstance(input, list) or isinstance(input, tuple):  # pragma: no cover
-                outputs = model(*input, training=True)
-            elif isinstance(input, dict):  # pragma: no cover
-                outputs = model(**input, training=True)
-            else:
-                outputs = model(input, training=True)
-            self.teacher_outputs = outputs
-        return outputs
-
-    def teacher_student_loss_cal(self, student_outputs, teacher_outputs):
-        """Calculate loss between student model and teacher model.
-
-        Args:
-            student_outputs (tensor): student outputs
-            teacher_outputs (tensor): teacher outputs
-
-        Returns:
-            tensor: loss
-        """
-        assert self.teacher_student_loss, "teacher_student_loss not specified."
-        return self.teacher_student_loss(teacher_outputs, student_outputs)
-
-    def student_targets_loss_cal(self, student_outputs, targets):
-        """Calculate loss of student model.
-
-        Args:
-            student_outputs (tensor): student outputs
-            targets (tensor): groud truth label
-
-        Returns:
-            tensor: loss
-        """
-        assert self.student_targets_loss, "student_targets_loss not specified."
-        return self.student_targets_loss(targets, student_outputs)
-
-    def __call__(self, student_outputs, targets):
-        """Return loss of student model.
-
-        Args:
-            student_outputs (tensor): student outputs
-            targets (tensor): groud truth label
-
-        Returns:
-            tensor: loss
-        """
-        tmp = student_outputs
-        student_outputs = targets
-        targets = tmp
-        return self.loss_cal(student_outputs, targets)
-
-
-@criterion_registry("KnowledgeDistillationLoss", "tensorflow")
-class TensorflowKnowledgeDistillationLossWrapper(object):
-    """TensorflowKnowledgeDistillationLossWrapper wraps TensorflowKnowledgeDistillationLoss."""
-
-    def __init__(self, param_dict):
-        """Initialize Tensorflow Knowledge Distillation Loss Wrapper.
-
-        Args:
-            param_dict (dict): parameter dict
-        """
-        self.param_dict = param_dict
-
-    def _param_check(self):
-        param_dict = self.param_dict
-        _params = ["temperature", "loss_types", "loss_weights"]
-        assert all(key in param_dict for key in _params), "Keys {} must be in input parameters.".format(_params)
-        assert param_dict["temperature"] > 0.0, "Value of temperature must be positive."
-        assert len(param_dict["loss_types"]) == len(
-            param_dict["loss_weights"]
-        ), "Length of loss_types and loss_weights must be the same."
-        assert all(
-            type(param_dict[k]) in [list, tuple] for k in ["loss_types", "loss_weights"]
-        ), "Type of loss_types and loss_weights must be list or tuple."
-        assert all(
-            any(isinstance(e, t) for t in [str, tf.keras]) for e in param_dict["loss_types"]
-        ), "Type of loss_types element must be str or torch Module."
-        assert (
-            all(0.0 <= e <= 1.0 for e in param_dict["loss_weights"])
-            and abs(sum(param_dict["loss_weights"]) - 1.0) < 1e-9
-        ), "Element of loss_weights must be in interval [0, 1] and summed to 1.0."
-        new_dict = {}
-        for k in _params:
-            new_dict[k] = param_dict[k]
-        return new_dict
-
-    def __call__(self, **kwargs):
-        """Return TensorflowKnowledgeDistillationLoss, param dict.
-
-        Returns:
-            class: TensorflowKnowledgeDistillationLoss
-            param dict (dict): param dict
-        """
-        return TensorflowKnowledgeDistillationLoss, self._param_check()
-
-
 class TensorflowKnowledgeDistillationLossExternal(KnowledgeDistillationLoss):
     """TensorflowKnowledgeDistillationLossExternal inherits from KnowledgeDistillationLoss."""
 
@@ -1399,224 +1115,3 @@ def loss_cal_sloss(self, student_outputs, teacher_outputs, student_loss):
     def __call__(self, student_outputs, targets):
         """Return 0."""
         return 0
-
-
-class PyTorchSelfKnowledgeDistillationLoss(SelfKnowledgeDistillationLoss):
-    """PyTorch SelfKnowledge Distillation Loss."""
-
-    def __init__(
-        self,
-        layer_mappings=[],
-        loss_types=None,
-        loss_weights=None,
-        temperature=1.0,
-        add_origin_loss=False,
-        student_model=None,
-        teacher_model=None,
-    ):
-        """Initialize PyTorch SelfKnowledge Distillation Loss class.
-
-        Args:
-            layer_mappings (list): layers of distillation.Format like
-                 [[[student1_layer_name1, teacher_layer_name1],[student2_layer_name1, teacher_layer_name1]],
-                 [[student1_layer_name2, teacher_layer_name2],[student2_layer_name2, teacher_layer_name2]]]
-            loss_types (list, optional): loss types. Defaults to ['CE'] * len(layer_mappings).
-            loss_weights (list, optional): loss weights. Defaults to [1.0 / len(layer_mappings)] *
-                len(layer_mappings).temperature (float, optional): use to calculate the soft label CE.
-            temperature (optional): temperature. Defaults to 1.0.
-            add_origin_loss (bool, optional): whether to add origin loss for hard label loss.
-            student_model (optional): student model. Defaults to None.
-            teacher_model (optional): teacher model. Defaults to None.
-        """
-        super(PyTorchSelfKnowledgeDistillationLoss, self).__init__(
-            layer_mappings=layer_mappings,
-            loss_types=loss_types,
-            loss_weights=loss_weights,
-            temperature=temperature,
-            add_origin_loss=add_origin_loss,
-            student_model=student_model,
-            teacher_model=teacher_model,
-        )
-
-    def SoftCrossEntropy(self, logits, targets):
-        """Return SoftCrossEntropy.
-
-        Args:
-            logits (tensor): output logits
-            targets (tensor): ground truth label
-
-        Returns:
-            tensor: SoftCrossEntropy
-        """
-        log_prob = torch.nn.functional.log_softmax(logits, dim=-1)
-        targets_prob = torch.nn.functional.softmax(targets, dim=-1)
-        return (-targets_prob * log_prob).sum(dim=-1).mean()
-
-    def KullbackLeiblerDivergence(self, logits, targets):
-        """Return KullbackLeiblerDivergence.
-
-        Args:
-            logits (tensor): output logits
-            targets (tensor): ground truth label
-
-        Returns:
-            tensor: KullbackLeiblerDivergence
-        """
-        log_prob = torch.nn.functional.log_softmax(logits, dim=-1)
-        targets_prob = torch.nn.functional.softmax(targets, dim=-1)
-        return torch.nn.functional.kl_div(log_prob, targets_prob)
-
-    def L2Divergence(self, feature1, feature2):
-        """Return L2Divergence.
-
-        Args:
-            feature1 (tensor): feature1 value
-            feature2 (tensor): feature2 value
-
-        Returns:
-            tensor: L2Divergence between feature1 and feature2
-        """
-        return torch.dist(feature1, feature2)
-
-    def init_loss_funcs(self):
-        """Init loss funcs."""
-        for loss_type in self.loss_types:
-            if loss_type == "CE":
-                loss_func = self.SoftCrossEntropy
-            elif loss_type == "KL":
-                loss_func = self.KullbackLeiblerDivergence
-            elif loss_type == "L2":
-                loss_func = self.L2Divergence
-            else:
-                raise NotImplementedError(
-                    f"Unsupported loss type {loss_type}, supported loss is "
-                    "CE for software CE, KL for Kullback-Leibler divergence and "
-                    "L2 for L2 distance."
-                )
-            self.loss_funcs.append(loss_func)
-
-    def loss_cal(self, student_outputs):
-        """Calculate loss of student model.
-
-        Args:
-            student_outputs (dict): student outputs
-
-        Returns:
-            tensor: loss
-        """
-        self.loss = torch.FloatTensor([0.0])
-        tmp_loss = 0
-        temperature = self.temperature
-        for loss_idx in range(len(self.layer_mappings)):
-            items = self.layer_mappings[loss_idx]
-            for idx in range(len(items)):
-                student_layer, teacher_layer = items[idx]
-                student_feature = student_outputs[student_layer]
-                teacher_feature = student_outputs[teacher_layer]
-                if loss_idx == 1:  # soft logit
-                    tmp_loss += (
-                        self.loss_funcs[loss_idx](student_feature / temperature, teacher_feature / temperature)
-                        * self.loss_weights[loss_idx]
-                    )
-                else:  # feature learning
-                    tmp_loss += (
-                        self.loss_funcs[loss_idx](student_feature, teacher_feature) * self.loss_weights[loss_idx]
-                    )
-            if tmp_loss.device != self.loss.device:
-                self.loss = self.loss.to(tmp_loss.device)
-            self.loss += tmp_loss
-        return self.loss
-
-    def teacher_model_forward(self, input, teacher_model=None, device=None):
-        """Teacher model forward.
-
-        Args:
-            input (tensor): input data
-            teacher_model (torch.nn.model, optional): teacher model. Defaults to None.
-            device (torch.device, optional): device. Defaults to None.
-
-        Returns:
-            tensor: output
-        """
-        outputs = None
-        if self.loss_weights[1] > 0:
-            model = self.teacher_model if teacher_model is None else teacher_model
-            assert isinstance(model, torch.nn.Module), "Teacher model should be a torch Module instead of {}".format(
-                type(model)
-            )
-            model.eval()
-            try:
-                model_device = next(model.parameters()).device
-            except:
-                logger.warning("Cannot get model device, assuming it's in CPU.")
-                model_device = "cpu"
-            device = model_device if device is None else device
-            if device != model_device:
-                model.to(device)
-            with torch.no_grad():
-                outputs = pytorch_forward_wrapper(model, input, device=device)
-            self.teacher_outputs = outputs
-        return outputs
-
-
-@criterion_registry("SelfKnowledgeDistillationLoss", "pytorch")
-class PyTorchSelfKnowledgeDistillationLossWrapper(object):
-    """PyTorch SelfKnowledge Distillation Loss Wrapper."""
-
-    def __init__(self, param_dict):
-        """Initialize PyTorchSelfKnowledgeDistillationLossWrapper class.
-
-        Args:
-            param_dict (dict): param dict
-        """
-        self.param_dict = param_dict
-
-    def _param_check(self):
-        param_dict = self.param_dict
-        _params = ["temperature", "layer_mappings", "loss_types", "loss_weights", "add_origin_loss"]
-        layer_mappings = param_dict["layer_mappings"]
-        if "loss_types" not in param_dict:
-            param_dict["loss_types"] = ["CE"] * len(layer_mappings)
-        if "loss_weights" not in param_dict:
-            param_dict["loss_weights"] = [1.0 / len(layer_mappings)] * len(layer_mappings)
-        if "add_origin_loss" not in param_dict:
-            param_dict["add_origin_loss"] = False
-        if "temperature" not in param_dict:
-            param_dict["temperature"] = 1.0
-        assert "layer_mappings" in param_dict, "Key layer_mappings must be in input parameters."
-        assert all(
-            type(param_dict[k]) in [list, tuple] for k in ["layer_mappings", "loss_types", "loss_weights"]
-        ), "Type of loss_types and loss_weights must be list or tuple."
-        assert isinstance(param_dict["add_origin_loss"], bool), "Type of add_origin_loss should be bool."
-        assert (
-            len(param_dict["layer_mappings"]) == len(param_dict["loss_types"]) == len(param_dict["loss_weights"])
-        ), "Length of layer_mappings, loss_types and loss_weights must be the same."
-        assert param_dict["temperature"] > 0.0, "Value of temperature must be positive."
-        for items in param_dict["layer_mappings"]:
-            assert all(type(it) in [list, tuple] and (len(it) == 2) for it in items), (
-                "Elements of layer_mappings must be list or tuple and with length of 2."
-                + "element looks like ['resblock.1.feature.output,"
-                + "'resblock.deepst.feature.output'], where "
-                + "'resblock.1.feature.output' and 'resblock.deepst.feature.output' "
-                + "represent resblock feature output of the student model and feature output of the"
-                + "teacher model respectively."
-            )
-        assert all(
-            any(isinstance(e, t) for t in [str]) for e in param_dict["loss_types"]
-        ), "Type of loss_types element must be str."
-        assert all(
-            0.0 <= e <= 1.0 for e in param_dict["loss_weights"]
-        ), "Element of loss_weights must be in interval [0, 1]."
-        new_dict = {}
-        for k in _params:
-            new_dict[k] = param_dict[k]
-        return new_dict
-
-    def __call__(self, **kwargs):
-        """Return PyTorchSelfKnowledgeDistillationLoss, param dict.
-
-        Returns:
-            class: PyTorchSelfKnowledgeDistillationLoss
-            param dict (dict): param dict
-        """
-        return PyTorchSelfKnowledgeDistillationLoss, self._param_check()
diff --git a/neural_compressor/experimental/common/optimizer.py b/neural_compressor/experimental/common/optimizer.py
index 877c02ed3dc..2de24629cc7 100644
--- a/neural_compressor/experimental/common/optimizer.py
+++ b/neural_compressor/experimental/common/optimizer.py
@@ -167,38 +167,6 @@ def __call__(self, **kwargs):
         return tfa.optimizers.AdamW, self._mapping(**kwargs)
 
 
-@optimizer_registry("Adam", "tensorflow")
-class TensorFlowAdam(object):
-    """Tensorflow Adam optimizer.
-
-    Args:
-        param_dict (dict): The dict of parameters setting by user for Adam optimizer
-    """
-
-    def __init__(self, param_dict):
-        """Initialize `TensorFlowAdam` class."""
-        assert isinstance(param_dict, dict), "This optimizer constructor parameter must be a dict"
-        self._param_dict = param_dict
-
-    def _mapping(self):
-        _param_map = {
-            "learning_rate": "learning_rate",
-            "beta_1": "beta_1",
-            "beta_2": "beta_2",
-            "epsilon": "epsilon",
-            "amsgrad": "amsgrad",
-        }
-        _dict = {}
-        for key in self._param_dict:
-            if key in _param_map:
-                _dict.update({_param_map[key]: self._param_dict[key]})
-        return _dict
-
-    def __call__(self, **kwargs):
-        """Call `TensorFlowAdam` object."""
-        return tf.keras.optimizers.Adam, self._mapping(**kwargs)
-
-
 @optimizer_registry("SGD", "pytorch")
 class PyTorchSGD(object):
     """PyTorch SGD optimizer.
diff --git a/neural_compressor/profiling/profiler/factory.py b/neural_compressor/profiling/profiler/factory.py
index 2273e0375e0..566de6cabb6 100644
--- a/neural_compressor/profiling/profiler/factory.py
+++ b/neural_compressor/profiling/profiler/factory.py
@@ -15,7 +15,7 @@
 """Profiling class factory."""
 from typing import Optional
 
-from neural_compressor.experimental.data.dataloaders.base_dataloader import BaseDataLoader
+from neural_compressor.data.dataloaders.base_dataloader import BaseDataLoader
 from neural_compressor.model import BaseModel
 from neural_compressor.profiling.profiler.onnxrt_profiler.factory import ProfilerFactory as OnnxrtProfilerFactory
 from neural_compressor.profiling.profiler.profiler import Profiler
diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py
index 0317c557f6c..0baa6e35756 100644
--- a/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py
+++ b/neural_compressor/profiling/profiler/onnxrt_profiler/factory.py
@@ -16,7 +16,7 @@
 
 from typing import Optional
 
-from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader
+from neural_compressor.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader
 from neural_compressor.model.onnx_model import ONNXModel
 from neural_compressor.profiling.profiler.onnxrt_profiler.profiler import Profiler
 
diff --git a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py
index 3ec6c866d95..a248b73927c 100644
--- a/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py
+++ b/neural_compressor/profiling/profiler/onnxrt_profiler/profiler.py
@@ -18,7 +18,7 @@
 from pathlib import Path
 from typing import Optional
 
-from neural_compressor.experimental.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader
+from neural_compressor.data.dataloaders.onnxrt_dataloader import ONNXRTDataLoader
 from neural_compressor.model.onnx_model import ONNXModel
 from neural_compressor.profiling.profiler.onnxrt_profiler.utils import create_onnx_config
 from neural_compressor.profiling.profiler.profiler import Profiler as Parent
diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py
index 888e66eb53c..245bf307339 100644
--- a/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py
+++ b/neural_compressor/profiling/profiler/tensorflow_profiler/factory.py
@@ -16,7 +16,7 @@
 
 from typing import Optional
 
-from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader
+from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader
 from neural_compressor.model.tensorflow_model import TensorflowBaseModel
 from neural_compressor.profiling.profiler.profiler import Profiler
 from neural_compressor.profiling.profiler.tensorflow_profiler.profiler import Profiler as FrozenPbProfiler
diff --git a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py
index 06802d05b89..d5564a75c40 100644
--- a/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py
+++ b/neural_compressor/profiling/profiler/tensorflow_profiler/profiler.py
@@ -17,7 +17,7 @@
 from pathlib import Path
 from typing import Optional
 
-from neural_compressor.experimental.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader
+from neural_compressor.data.dataloaders.tensorflow_dataloader import TensorflowDataLoader
 from neural_compressor.model.tensorflow_model import TensorflowBaseModel
 from neural_compressor.profiling.profiler.profiler import Profiler as Parent
 
diff --git a/neural_compressor/utils/create_obj_from_config.py b/neural_compressor/utils/create_obj_from_config.py
index 6db34e3bf55..fa1ddafca95 100644
--- a/neural_compressor/utils/create_obj_from_config.py
+++ b/neural_compressor/utils/create_obj_from_config.py
@@ -189,7 +189,8 @@ def create_train_func(framework, dataloader, adaptor, train_cfg, hooks=None, cal
     assert dataloader, "dataloader should NOT be empty when train_func is None"
     assert adaptor, "adaptor should NOT be empty"
 
-    from neural_compressor.experimental.common import Criterions, Optimizers
+    from neural_compressor.compression.distillation.criterions import Criterions
+    from neural_compressor.compression.distillation.optimizers import Optimizers
 
     postprocess_cfg = train_cfg.postprocess
     if postprocess_cfg is not None:
diff --git a/test/distillation/test_distillation_1.x.py b/test/distillation/test_distillation_1.x.py
index d754b6dea88..802f81148a5 100644
--- a/test/distillation/test_distillation_1.x.py
+++ b/test/distillation/test_distillation_1.x.py
@@ -261,19 +261,6 @@ def test_distillation_external_new_API(self):
         stat = torch.load("./saved/best_model.pt")
         opt_model = self.student_model.load_state_dict(stat)
 
-    @unittest.skipIf(version1_lt_version2(tf.version.VERSION, "2.3.0"), " keras requires higher version than tf-2.3.0")
-    def test_tf_distillation(self):
-        from neural_compressor.conf.config import DistillationConf
-        from neural_compressor.experimental import Distillation
-
-        conf = DistillationConf("fake_1.yaml")
-        distiller = Distillation(conf)
-        distiller = Distillation("fake_1.yaml")
-        distiller.student_model = self.student_model_tf
-        distiller.teacher_model = self.teacher_model_tf
-        print("student model: {}".format(distiller.student_model))
-        _ = distiller.fit()
-
 
 if __name__ == "__main__":
     unittest.main()