diff --git a/vllm/config.py b/vllm/config.py index 2513d43ce8e6b..76c10d464aa2c 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -212,7 +212,7 @@ def _verify_quantization(self) -> None: f"{self.quantization} quantization is currently not " f"supported in ROCm.") if (self.quantization - not in ["marlin", "gptq_marlin_24", "gptq_marlin"]): + not in ("fp8", "marlin", "gptq_marlin_24", "gptq_marlin")): logger.warning( "%s quantization is not fully " "optimized yet. The speed can be slower than "