From a292886fe763763cee984caad3cfc41846e75a9c Mon Sep 17 00:00:00 2001 From: Lu Fang <30275821+houseroad@users.noreply.github.com> Date: Thu, 2 Jan 2025 16:58:56 -0800 Subject: [PATCH] [Bugfix] Check chain_speculative_sampling before calling it (#11673) Signed-off-by: Lu Fang Signed-off-by: ZincCat --- vllm/model_executor/layers/rejection_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/model_executor/layers/rejection_sampler.py b/vllm/model_executor/layers/rejection_sampler.py index 165e8309fee64..f173cbde03f44 100644 --- a/vllm/model_executor/layers/rejection_sampler.py +++ b/vllm/model_executor/layers/rejection_sampler.py @@ -118,7 +118,7 @@ def forward( # If use Flashinfer chain_speculative_sampling kernel # for rejection sampling - if self.use_flashinfer: + if self.use_flashinfer and chain_speculative_sampling is not None: batch_size, k, _ = draft_probs.shape uniform_samples = self._create_uniform_samples( seeded_seqs, batch_size, k, draft_probs.device)