fixes

abacaj · Sep 8, 2023 · 4e6a503 · 4e6a503
1 parent e914abb
commit 4e6a503
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 9 deletions.
diff --git a/megatron/initialize.py b/megatron/initialize.py
@@ -130,10 +130,10 @@ def _initialize_torch_distributed(args: argparse.Namespace):
                 args.local_rank = device
             torch.cuda.set_device(device)
     # Call the init process
-    if args.cross_block_networking or args.force_socket_networking:
-        os.environ["NCCL_NET"] = "Socket"
-    else:
-        os.environ["NCCL_NET"] = "IB"
+    # if args.cross_block_networking or args.force_socket_networking:
+    #     os.environ["NCCL_NET"] = "Socket"
+    # else:
+    #     os.environ["NCCL_NET"] = "IB"
     torch.distributed.init_process_group(
         backend=args.default_backend,
         world_size=args.world_size,

diff --git a/megatron/mpu/initialize.py b/megatron/mpu/initialize.py
@@ -631,17 +631,20 @@ def force_communicator_creation(
         torch.distributed.barrier(group=group)
     if all_reduce:
         one_tensor = torch.cuda.FloatTensor([1.0])
-        torch.distributed.all_reduce(one_tensor, op=torch.distributed.ReduceOp.SUM, group=group)
+        if world_size and world_size > 1:
+            torch.distributed.all_reduce(one_tensor, op=torch.distributed.ReduceOp.SUM)
     if all_gather:
         assert (
             rank is not None and world_size is not None
         ), "Must supply rank and world_size for all_gather initialization"
         tensor_list = [torch.empty_like(one_tensor) for _ in range(world_size)]
         tensor_list[rank] = one_tensor
-        torch.distributed.all_gather(tensor_list, one_tensor, group=group)
+        if world_size > 1:
+            torch.distributed.all_gather(tensor_list, one_tensor)
     if broadcast:
         one_tensor = torch.cuda.FloatTensor([1.0])
-        torch.distributed.broadcast(one_tensor, src_rank, group=group)
+        if world_size > 1:
+            torch.distributed.broadcast(one_tensor, src_rank)
 
 
 def force_pipeline_communicator_creation(ignore_virtual=False):

diff --git a/megatron/text_generation_server.py b/megatron/text_generation_server.py
@@ -582,16 +582,18 @@ def put(self) -> Union[Tuple[str, int], str]:
                 (
                     response,
                     response_logprobs,
-                    _,
+                    all_tokens,
                     generations,
                     _,
                     human_readable_tokens,
                 ) = retval
 
                 end_time = datetime.datetime.now()
-                print("Query latency: ", end_time - start_time, flush=True)
+                print(all_tokens)
                 output = {
                     "text": response,
+                    "query_time_ms": (end_time - start_time).total_seconds() * 1000,
+                    "tokens_generated": sum([len(t) for t in all_tokens]),
                     "logprobs": response_logprobs,
                     "generations": generations,
                     "human_readable_tokens": human_readable_tokens,