diff --git a/nemo/backends/pytorch/common/multi_data.py b/nemo/backends/pytorch/common/multi_data.py
index ab454b67a306..ae0ec26a7585 100644
--- a/nemo/backends/pytorch/common/multi_data.py
+++ b/nemo/backends/pytorch/common/multi_data.py
@@ -100,7 +100,15 @@ def __init__(self, datasets: List[torch.utils.data.Dataset], combination_mode: s
         """
         self.datasets = datasets
         self.combination_mode = combination_mode
-        self.len = None
+        if self.combination_mode == "cross_product":
+            self.len = np.prod([len(d) for d in self.datasets])
+        elif self.combination_mode == "zip":
+            ds_lens = [len(d) for d in self.datasets]
+            self.len = np.min(ds_lens)
+            if len(set(ds_lens)) != 1:
+                raise ValueError("datasets do not have equal lengths.")
+        else:
+            raise ValueError("combination_mode unknown")
 
     def __getitem__(self, i):
         """
@@ -115,14 +123,4 @@ def __len__(self):
         In case of  combination_mode="cross_product" this would be prod(len(d) for d in self.datasets). 
         In case of  combination_mode="zip" this would be min(len(d) for d in self.datasets) given that all datasets have same length. 
         """
-        if not self.len:
-            if self.combination_mode == "cross_product":
-                self.len = np.prod([len(d) for d in self.datasets])
-            elif self.combination_mode == "zip":
-                ds_lens = [len(d) for d in self.datasets]
-                self.len = np.min(ds_lens)
-                if not np.all(ds_lens):
-                    logging.warning("datasets do not have equal lengths and will be pruned to the shortest length.")
-            else:
-                raise ValueError("combination_mode unknown")
         return self.len
diff --git a/tests/integration/test_integration_multidataset.py b/tests/integration/test_integration_multidataset.py
new file mode 100644
index 000000000000..35491f620aa6
--- /dev/null
+++ b/tests/integration/test_integration_multidataset.py
@@ -0,0 +1,68 @@
+# ! /usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright 2020 NVIDIA. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# =============================================================================
+
+import os
+import shutil
+from unittest import TestCase
+
+import pytest
+import torch
+
+import nemo
+from nemo.core import ChannelType, NeuralType
+
+logging = nemo.logging
+
+
+@pytest.mark.usefixtures("neural_factory")
+class TestMultiDLIntegration(TestCase):
+    @classmethod
+    def setUpClass(cls) -> None:
+        super().setUpClass()
+
+    @pytest.mark.integration
+    def test_pipeline(self):
+        batch_size = 4
+        dataset_size_0 = 100
+        dataset_size_1 = 100
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_0)
+        dl_2 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_1)
+
+        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+            data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode="zip"
+        )
+        x_0, y_0, x_1, y_1 = data_layer()
+
+        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
+        loss = nemo.backends.pytorch.tutorials.MSELoss()
+        combined_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
+        pred_0 = trainable_module(x=x_0)
+        pred_1 = trainable_module(x=x_1)
+        l_0 = loss(predictions=pred_0, target=y_0)
+        l_1 = loss(predictions=pred_1, target=y_1)
+        total_loss = combined_loss(loss_1=l_0, loss_2=l_1)
+
+        callback = nemo.core.SimpleLossLoggerCallback(
+            tensors=[total_loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
+        )
+        # Instantiate an optimizer to perform `train` action
+        optimizer = nemo.backends.pytorch.actions.PtActions()
+        optimizer.train(
+            tensors_to_optimize=[total_loss], optimizer="sgd", optimization_params={"lr": 0.0003, "max_steps": 2},
+        )
diff --git a/tests/unclassified/test_unclassified_multidataset.py b/tests/unit/test_unit_multidataset.py
similarity index 63%
rename from tests/unclassified/test_unclassified_multidataset.py
rename to tests/unit/test_unit_multidataset.py
index 21fe9aa81bd4..f356c61422db 100644
--- a/tests/unclassified/test_unclassified_multidataset.py
+++ b/tests/unit/test_unit_multidataset.py
@@ -24,18 +24,18 @@
 import torch
 
 import nemo
-from nemo.core import ChannelType, LabelsType, MaskType, NeuralType
+from nemo.core import ChannelType, NeuralType
 
 logging = nemo.logging
 
 
 @pytest.mark.usefixtures("neural_factory")
-class TestMultiDL(TestCase):
+class TestMultiDLUnit(TestCase):
     @classmethod
     def setUpClass(cls) -> None:
         super().setUpClass()
 
-    @pytest.mark.unclassified
+    @pytest.mark.unit
     def test_port_name_collision_handling(self):
         batch_size = 4
         dataset_size = 4
@@ -59,7 +59,7 @@ def test_port_name_collision_handling(self):
         self.assertEqual([*data_layer.output_ports], ["a", "b", "a_1", "c"])
         self.assertEqual(len(data_layer), dataset_size * dataset_size)
 
-    @pytest.mark.unclassified
+    @pytest.mark.unit
     def test_port_renaming(self):
         batch_size = 4
         dataset_size = 4
@@ -86,11 +86,12 @@ def test_port_renaming(self):
         )
         self.assertEqual([*data_layer.output_ports], ["1", "2", "3", "4"])
 
-    @pytest.mark.unclassified
-    def test_multi_dl_zip(self):
+    @pytest.mark.unit
+    def test_multi_dl_zip_working(self):
+        dataset_size_0 = 2
+        dataset_size_1 = 2
+        final_dataset_size = 2
         batch_size = 4
-        dataset_size_0 = 4
-        dataset_size_1 = 5
         shuffle = False
         dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
             size=dataset_size_0,
@@ -108,36 +109,53 @@ def test_multi_dl_zip(self):
         data_layer = nemo.backends.pytorch.common.MultiDataLayer(
             data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode="zip"
         )
-        self.assertEqual(len(data_layer), dataset_size_0)
+        self.assertEqual(len(data_layer), final_dataset_size)
 
-    @pytest.mark.unclassified
-    def test_pipeline(self):
+    @pytest.mark.unit
+    def test_multi_dl_zip_failing(self):
+        dataset_size_0 = 4
+        dataset_size_1 = 2
         batch_size = 4
-        dataset_size_0 = 100
-        dataset_size_1 = 100
         shuffle = False
-        dl_1 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_0)
-        dl_2 = nemo.backends.pytorch.tutorials.RealFunctionDataLayer(batch_size=batch_size, n=dataset_size_1)
-
-        data_layer = nemo.backends.pytorch.common.MultiDataLayer(
-            data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode="zip"
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_0,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
         )
-        x_0, y_0, x_1, y_1 = data_layer()
-
-        trainable_module = nemo.backends.pytorch.tutorials.TaylorNet(dim=4)
-        loss = nemo.backends.pytorch.tutorials.MSELoss()
-        combined_loss = nemo.backends.pytorch.common.losses.LossAggregatorNM(num_inputs=2)
-        pred_0 = trainable_module(x=x_0)
-        pred_1 = trainable_module(x=x_1)
-        l_0 = loss(predictions=pred_0, target=y_0)
-        l_1 = loss(predictions=pred_1, target=y_1)
-        total_loss = combined_loss(loss_1=l_0, loss_2=l_1)
-
-        callback = nemo.core.SimpleLossLoggerCallback(
-            tensors=[total_loss], print_func=lambda x: logging.info(f'Train Loss: {str(x[0].item())}'),
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_1,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
         )
-        # Instantiate an optimizer to perform `train` action
-        optimizer = nemo.backends.pytorch.actions.PtActions()
-        optimizer.train(
-            tensors_to_optimize=[total_loss], optimizer="sgd", optimization_params={"lr": 0.0003, "num_epochs": 1},
+
+        with pytest.raises(ValueError):
+            data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+                data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode="zip"
+            )
+
+    @pytest.mark.unit
+    def test_multi_dl_wrong_combination(self):
+        dataset_size_0 = 2
+        dataset_size_1 = 2
+        unknown_combination = "cross"
+        batch_size = 4
+        shuffle = False
+        dl_1 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_0,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "b": NeuralType(('B', 'T'), ChannelType())},
         )
+        dl_2 = nemo.backends.pytorch.common.ZerosDataLayer(
+            size=dataset_size_1,
+            dtype=torch.FloatTensor,
+            batch_size=batch_size,
+            output_ports={"a": NeuralType(('B', 'T'), ChannelType()), "c": NeuralType(('B', 'T'), ChannelType())},
+        )
+
+        with pytest.raises(ValueError):
+            data_layer = nemo.backends.pytorch.common.MultiDataLayer(
+                data_layers=[dl_1, dl_2], batch_size=batch_size, shuffle=shuffle, combination_mode=unknown_combination
+            )