fix: better warmup error

lmaosweqf1 · Oct 25, 2023 · 96a982a · 96a982a
1 parent f9910d1
commit 96a982a
Showing 1 changed file with 1 addition and 1 deletion.
diff --git a/server/text_generation_server/models/flash_causal_lm.py b/server/text_generation_server/models/flash_causal_lm.py
@@ -670,7 +670,7 @@ def warmup(self, batch: FlashCausalLMBatch):
                 self.device,
             )
             _, batch = self.generate_token(batch)
-        except Exception as e:
+        except torch.cuda.OutOfMemoryError as e:
             raise RuntimeError(
                 f"Not enough memory to handle {len(batch.input_ids)} prefill tokens. "
                 f"You need to decrease `--max-batch-prefill-tokens`"