diff --git a/vllm/hpu/ops.py b/vllm/hpu/ops.py index 1f2e07bd59ccb..fa9d5ff521a6a 100644 --- a/vllm/hpu/ops.py +++ b/vllm/hpu/ops.py @@ -125,7 +125,6 @@ def silu_and_mul_wrapper(x: torch.Tensor) -> torch.Tensor: return out -@hpu_utils.with_mark_steps def static_fused_moe(hidden_states, w1, w2, score, topk): B, D = hidden_states.shape num_experts = w1.shape[0] @@ -142,6 +141,8 @@ def static_fused_moe(hidden_states, w1, w2, score, topk): padded_weights = padded_weights.reshape(-1, B, w1.shape[0]) padded_weights = padded_weights.permute(2, 0, 1).unsqueeze(-1) + htorch.core.mark_step() + for expert_idx in range(num_experts): padded_weight = padded_weights[expert_idx] current_state_static = hidden_states.reshape(-1, D) @@ -149,5 +150,6 @@ def static_fused_moe(hidden_states, w1, w2, score, topk): w_output = torch.matmul(w_output, w2[expert_idx].transpose(0, 1)) current_hidden_states_static = w_output * padded_weight final_hidden_states += current_hidden_states_static + htorch.core.mark_step() return final_hidden_states.view(-1, D)