From 0e86693b8039acf58711c85f067436c3cbc66a0b Mon Sep 17 00:00:00 2001
From: Steve Luo <36296769+SunflowerAries@users.noreply.github.com>
Date: Tue, 14 Jan 2025 04:43:51 +0800
Subject: [PATCH] [Bugfix] Fix deepseekv3 gate bias error (#12002)

Signed-off-by: mgoin <michael@neuralmagic.com>
Co-authored-by: mgoin <michael@neuralmagic.com>
---
 .../layers/fused_moe/fused_moe.py             | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index 3ea6217d7c0ef..308c1d6ac6db1 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -497,7 +497,10 @@ def grouped_topk(hidden_states: torch.Tensor,
         raise ValueError(f"Unsupported scoring function: {scoring_func}")
 
     if e_score_correction_bias is not None:
-        scores.add_(e_score_correction_bias.unsqueeze(0))
+        # Store original scores before applying correction bias. We use biased
+        # scores for expert selection but original scores for routing weights
+        original_scores = scores
+        scores = scores + e_score_correction_bias.unsqueeze(0)
 
     num_token = scores.shape[0]
     group_scores = scores.view(num_token, num_expert_group,
@@ -510,10 +513,16 @@ def grouped_topk(hidden_states: torch.Tensor,
         num_token, num_expert_group,
         scores.shape[-1] // num_expert_group).reshape(num_token, -1)  # [n, e]
     tmp_scores = scores.masked_fill(~score_mask.bool(), 0.0)  # [n, e]
-    topk_weights, topk_ids = torch.topk(tmp_scores,
-                                        k=topk,
-                                        dim=-1,
-                                        sorted=False)
+
+    if e_score_correction_bias is not None:
+        topk_ids = torch.topk(tmp_scores, k=topk, dim=-1, sorted=False)[1]
+        # Use original unbiased scores for the routing weights
+        topk_weights = original_scores.gather(1, topk_ids)
+    else:
+        topk_weights, topk_ids = torch.topk(tmp_scores,
+                                            k=topk,
+                                            dim=-1,
+                                            sorted=False)
 
     if renormalize:
         topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)