diff --git a/vllm/model_executor/layers/rejection_sampler.py b/vllm/model_executor/layers/rejection_sampler.py index 165e8309fee64..f173cbde03f44 100644 --- a/vllm/model_executor/layers/rejection_sampler.py +++ b/vllm/model_executor/layers/rejection_sampler.py @@ -118,7 +118,7 @@ def forward( # If use Flashinfer chain_speculative_sampling kernel # for rejection sampling - if self.use_flashinfer: + if self.use_flashinfer and chain_speculative_sampling is not None: batch_size, k, _ = draft_probs.shape uniform_samples = self._create_uniform_samples( seeded_seqs, batch_size, k, draft_probs.device)