diff --git a/deepspeed/runtime/pipe/p2p.py b/deepspeed/runtime/pipe/p2p.py
index 6f3b2840152b..24c0f250a4b9 100644
--- a/deepspeed/runtime/pipe/p2p.py
+++ b/deepspeed/runtime/pipe/p2p.py
@@ -14,7 +14,7 @@ def init_process_groups(grid):
     global _groups, _grid
     _grid = grid
 
-    assert _grid.pipe_parallel_size > 1, "There is no model parallelism"
+    assert _grid.pipe_parallel_size > 1, "There is no pipeline parallelism"
 
     _groups = [dist.new_group(ranks=group) for group in _grid.p2p_groups]
 
diff --git a/deepspeed/runtime/zero/stage2.py b/deepspeed/runtime/zero/stage2.py
index c8835d178eff..1a590ae51827 100755
--- a/deepspeed/runtime/zero/stage2.py
+++ b/deepspeed/runtime/zero/stage2.py
@@ -260,7 +260,7 @@ def __init__(self,
             for p, q in zip(self.fp16_groups[i], updated_params):
                 p.data = q.data
 
-            #divide the flat weights into near equal paritition equal to the data parallel degree
+            #divide the flat weights into near equal partition equal to the data parallel degree
             #each process will compute on a different part of the partition
             data_parallel_partitions = self.get_data_parallel_partitions(
                 self.fp16_groups_flat[i])
@@ -367,10 +367,10 @@ def __init__(self,
         #stores the offset at which a parameter gradient needs to be inserted in a partition
         self.grad_partition_insertion_offset = {}
 
-        #the offset in the gradient at which it must be inserted at the beginning of the paritition
+        #the offset in the gradient at which it must be inserted at the beginning of the partition
         self.grad_start_offset = {}
 
-        #will store the averaged gradients required by this parititon
+        #will store the averaged gradients required by this partition
         self.averaged_gradients = {}
 
         # store index of first parameter in each partition