From 71f21e1ad558c054a1854425ce15ffff46055bcf Mon Sep 17 00:00:00 2001 From: Will Lin Date: Tue, 20 Aug 2024 13:08:49 -0700 Subject: [PATCH 1/2] Raise error if not using async engine --- vllm/engine/llm_engine.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 36cb6ce795f3e..d766cc941ecd9 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1291,6 +1291,11 @@ def step(self) -> List[Union[RequestOutput, EmbeddingRequestOutput]]: raise NotImplementedError( "Pipeline parallelism is only supported through AsyncLLMEngine " "as performance will be severely degraded otherwise.") + + if self.scheduler_config.num_scheduler_steps > 1: + raise NotImplementedError( + "Multiple scheduler steps (multi-step) are only supported " + "through AsyncLLMEngine. ") seq_group_metadata_list, scheduler_outputs = self.scheduler[ 0].schedule() From ed704acf38aa1873db99fa22cfc12e6b6c7a78c7 Mon Sep 17 00:00:00 2001 From: SolitaryThinker Date: Tue, 20 Aug 2024 16:59:57 -0700 Subject: [PATCH 2/2] Trigger CI