diff --git a/nemo/backends/pytorch/common/other.py b/nemo/backends/pytorch/common/other.py
index 42fe4ec5e0d0..e86957d22f7d 100644
--- a/nemo/backends/pytorch/common/other.py
+++ b/nemo/backends/pytorch/common/other.py
@@ -11,7 +11,6 @@
 import torch.nn as nn
 
 from nemo.backends.pytorch.nm import NonTrainableNM, TrainableNM
-from nemo.core import NeuralModule
 from nemo.core.neural_types import *
 
 
@@ -21,14 +20,14 @@ def input_ports(self):
         """Returns definitions of module input ports.
         """
         # return {"input_seq": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag)})}
-        return {"input_seq": NeuralModule(ChannelType(), ('T', 'B'))}
+        return {"input_seq": NeuralType(('B', 'T'))}
 
     @property
     def output_ports(self):
         """Returns definitions of module output ports.
         """
         # return {"outputs": NeuralType({0: AxisType(TimeTag), 1: AxisType(BatchTag), 2: AxisType(ChannelTag),})}
-        return {"outputs": NeuralType(('T', 'B', 'D'), ChannelType())}
+        return {"outputs": NeuralType(('B', 'T', 'C'))}
 
     def __init__(self, voc_size, hidden_size, dropout=0.0):
         super().__init__()
diff --git a/nemo/core/neural_types/neural_type.py b/nemo/core/neural_types/neural_type.py
index a2070c354b3c..a7ad02577adc 100644
--- a/nemo/core/neural_types/neural_type.py
+++ b/nemo/core/neural_types/neural_type.py
@@ -21,7 +21,6 @@
     'NeuralTypeError',
     'NeuralPortNameMismatchError',
     'NeuralPortNmTensorMismatchError',
-    'NeuralPortNmTensorMismatchError',
     'CanNotInferResultNeuralType',
 ]
 import uuid
@@ -46,6 +45,15 @@ class NeuralType(object):
             type can be optional.
     """
 
+    def __str__(self):
+        return (
+            f"axes: {[(c.kind, c.size, c.is_list) for c in self.axes]}\n"
+            f"elements_type: {self.elements_type.__class__.__name__}"
+        )
+        # return f"axes: {self.axes}"  # " elements_type: {self.elements_type}"
+        # return f" elements_type: {self.elements_type.__class__.__name__}"
+        # return "help"
+
     def __init__(self, axes: Optional[Tuple] = None, elements_type: ElementType = VoidType(), optional=False):
         if not isinstance(elements_type, ElementType):
             raise ValueError(
@@ -87,6 +95,8 @@ def compare(self, second) -> NeuralTypeComparisonResult:
 
         dimensions_pass = NeuralType.__compare_axes(axes_a, axes_b)
         element_comparison_result = self.elements_type.compare(second.elements_type)
+        if isinstance(second.elements_type, VoidType):
+            element_comparison_result = NeuralTypeComparisonResult.SAME
 
         # SAME DIMS
         if dimensions_pass == 0:
diff --git a/tests/core/test_weight_share.py b/tests/core/test_weight_share.py
index f4468bf596fb..3fd9fcd1b9d1 100644
--- a/tests/core/test_weight_share.py
+++ b/tests/core/test_weight_share.py
@@ -27,9 +27,12 @@
 
 import nemo
 import nemo.collections.asr as nemo_asr
+from nemo.collections.nlp.nm.trainables.common import TokenClassifier
+from nemo.collections.nlp.nm.losses import PaddedSmoothedCrossEntropyLossNM
 from nemo.core import WeightShareTransform
 from nemo.core.neural_types import *
 from tests.common_setup import NeMoUnitTest
+from nemo.backends.pytorch.nm import DataLayerNM
 
 logging = nemo.logging
 
@@ -136,26 +139,69 @@ def test_TaylorNet_get_weights(self):
     #     tn2.fc1.bias.data = torch.tensor([0.1])
     #     self.assertTrue(self.__check_if_weights_are_equal(tn1.get_weights(), tn2.get_weights()))
 
-    # def test_tie_weights2(self):
-    #     voc_size = 3
-    #     dim = 2
-    #     embd = nemo.backends.pytorch.common.other.SequenceEmbedding(voc_size=voc_size, hidden_size=dim)
-    #     proj = nemo.backends.pytorch.common.other.SequenceProjection(from_dim=dim, to_dim=voc_size)
-    #     embd.tie_weights_with(
-    #         proj,
-    #         weight_names=["embedding.weight"],
-    #         name2name_and_transform={"embedding.weight": ("projection.weight", WeightShareTransform.SAME,)},
-    #     )
-    #     self.assertTrue(
-    #         np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),)
-    #     )
-    #     was = embd.embedding.weight.detach().numpy()
-    #     embd.embedding.weight.data = torch.tensor(np.random.randint(0, 10, (3, 2)) * 1.0)
-    #     after = embd.embedding.weight.detach().numpy()
-    #     self.assertTrue(
-    #         np.array_equal(embd.embedding.weight.detach().numpy(), proj.projection.weight.detach().numpy(),)
-    #     )
-    #     self.assertFalse(np.array_equal(was, after))
+    def test_tie_weights(self):
+        class DummyDataLayer(DataLayerNM):
+            def __init__(self, vocab_size):
+                super().__init__()
+                self.vocab_size = vocab_size
+
+                class DummyDS(torch.utils.data.Dataset):
+                    def __init__(self, vocab_size):
+                        super().__init__()
+
+                    def __getitem__(self, index):
+                        model_inputs = torch.randint(high=vocab_size, size=[10])
+                        model_outputs = torch.randint(high=vocab_size, size=[10])
+                        return (model_inputs, model_outputs)
+
+                    def __len__(self):
+                        return 10
+
+                self._dataset = DummyDS(vocab_size)
+
+            @property
+            def output_ports(self):
+                return {
+                    "model_inputs": NeuralType(('B', 'T')),
+                    "model_outputs": NeuralType(('B', 'T')),
+                }
+
+            def __len__(self):
+                return len(self._dataset)
+
+            @property
+            def dataset(self):
+                return self._dataset
+
+            def data_iterator(self):
+                pass
+
+        voc_size = 10
+        dim = 10
+        embd = nemo.backends.pytorch.common.other.SequenceEmbedding(voc_size=voc_size, hidden_size=dim)
+        proj = TokenClassifier(hidden_size=dim, num_classes=voc_size)
+        data = DummyDataLayer(voc_size)
+        loss = PaddedSmoothedCrossEntropyLossNM(0)
+        embd.tie_weights_with(
+            proj,
+            weight_names=["embedding.weight"],
+            name2name_and_transform={"embedding.weight": ("mlp.layer2.weight", WeightShareTransform.SAME)},
+        )
+        self.assertTrue(
+            np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
+        )
+        _in, _out = data()
+        pred = embd(input_seq=_in)
+        pred = proj(hidden_states=pred)
+        loss_t = loss(target_ids=_in, logits=pred)
+
+        self.nf.train(
+            [loss_t], optimizer="sgd", optimization_params={"max_steps": 5, "lr": 0.0003},
+        )
+
+        self.assertTrue(
+            np.array_equal(embd.embedding.weight.detach().cpu().numpy(), proj.mlp.layer2.weight.detach().cpu().numpy())
+        )
 
     def test_set_weights(self):
         voc_size = 3