From 0e86693b8039acf58711c85f067436c3cbc66a0b Mon Sep 17 00:00:00 2001 From: Steve Luo <36296769+SunflowerAries@users.noreply.github.com> Date: Tue, 14 Jan 2025 04:43:51 +0800 Subject: [PATCH] [Bugfix] Fix deepseekv3 gate bias error (#12002) Signed-off-by: mgoin Co-authored-by: mgoin --- .../layers/fused_moe/fused_moe.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py index 3ea6217d7c0ef..308c1d6ac6db1 100644 --- a/vllm/model_executor/layers/fused_moe/fused_moe.py +++ b/vllm/model_executor/layers/fused_moe/fused_moe.py @@ -497,7 +497,10 @@ def grouped_topk(hidden_states: torch.Tensor, raise ValueError(f"Unsupported scoring function: {scoring_func}") if e_score_correction_bias is not None: - scores.add_(e_score_correction_bias.unsqueeze(0)) + # Store original scores before applying correction bias. We use biased + # scores for expert selection but original scores for routing weights + original_scores = scores + scores = scores + e_score_correction_bias.unsqueeze(0) num_token = scores.shape[0] group_scores = scores.view(num_token, num_expert_group, @@ -510,10 +513,16 @@ def grouped_topk(hidden_states: torch.Tensor, num_token, num_expert_group, scores.shape[-1] // num_expert_group).reshape(num_token, -1) # [n, e] tmp_scores = scores.masked_fill(~score_mask.bool(), 0.0) # [n, e] - topk_weights, topk_ids = torch.topk(tmp_scores, - k=topk, - dim=-1, - sorted=False) + + if e_score_correction_bias is not None: + topk_ids = torch.topk(tmp_scores, k=topk, dim=-1, sorted=False)[1] + # Use original unbiased scores for the routing weights + topk_weights = original_scores.gather(1, topk_ids) + else: + topk_weights, topk_ids = torch.topk(tmp_scores, + k=topk, + dim=-1, + sorted=False) if renormalize: topk_weights = topk_weights / topk_weights.sum(dim=-1, keepdim=True)