vllm-project · DarkLight1337 · Oct 24, 2024 · Oct 16, 2024 · Oct 16, 2024 · Oct 16, 2024
diff --git a/docs/source/models/supported_models.rst b/docs/source/models/supported_models.rst
@@ -408,7 +408,7 @@ Text Generation
     -
   * - :code:`MolmoForCausalLM`
     - Molmo
-    - Image
+    - T + I
     - :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
     -
     - ✅︎

@@ -101,10 +101,8 @@ def iter_params(self, model_name: str):
 # The values displayed here are only a rough indicator of the size of the model
 
 # yapf: disable
-GENERATION_MODEL_SETTINGS = {
-    # [DETAILED TESTS]
-    "meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(),
-    # [FAST TESTS]
+TEXT_GENERATION_MODELS = {
+    # [Decoder-only]
     # Uses Llama
     # "BAAI/AquilaChat-7B": PPTestSettings.fast(),
     "Snowflake/snowflake-arctic-instruct": PPTestSettings.fast(tp_base=8, trust_remote_code=True),  # noqa: E501
@@ -133,6 +131,7 @@ def iter_params(self, model_name: str):
     "core42/jais-13b-chat": PPTestSettings.fast(),
     # TODO: Implement PP
     # "ai21labs/AI21-Jamba-1.5-Mini": PPTestSettings.fast(),
+    "meta-llama/Meta-Llama-3-8B": PPTestSettings.detailed(),
     "openbmb/MiniCPM-2B-sft-bf16": PPTestSettings.fast(trust_remote_code=True),
     "openbmb/MiniCPM3-4B": PPTestSettings.fast(trust_remote_code=True),
     # Uses Llama
@@ -158,39 +157,39 @@ def iter_params(self, model_name: str):
     "upstage/solar-pro-preview-instruct": PPTestSettings.fast(tp_base=2),
     # FIXME: Cannot load tokenizer in latest transformers version
     # "xverse/XVERSE-7B-Chat": PPTestSettings.fast(trust_remote_code=True),
+    # [Encoder-only]
+    # TODO: Implement PP
+    # "facebook/bart-base": PPTestSettings.fast(),
 }
 
-EMBEDDING_MODEL_SETTINGS = {  # type: ignore[var-annotated]
-    # [FAST TESTS]
+EMBEDDING_MODELS = {  # type: ignore[var-annotated]
+    # [Text-only]
     "intfloat/e5-mistral-7b-instruct": PPTestSettings.fast(),
     "BAAI/bge-multilingual-gemma2": PPTestSettings.fast(),
     "Qwen/Qwen2.5-Math-RM-72B": PPTestSettings.fast(tp_base=4, trust_remote_code=True),  # noqa: E501
 }
 
-MULTIMODAL_MODEL_SETTINGS = {
-    # [FAST TESTS]
+MULTIMODAL_MODELS = {
+    # [Decoder-only]
     "Salesforce/blip2-opt-2.7b": PPTestSettings.fast(),
     "facebook/chameleon-7b": PPTestSettings.fast(),
     "adept/fuyu-8b": PPTestSettings.fast(),
+    "THUDM/glm-4v-9b": PPTestSettings.fast(trust_remote_code=True),
     "OpenGVLab/InternVL2-1B": PPTestSettings.fast(trust_remote_code=True),
     "llava-hf/llava-1.5-7b-hf": PPTestSettings.fast(),
     "llava-hf/llava-v1.6-mistral-7b-hf": PPTestSettings.fast(),
     "llava-hf/LLaVA-NeXT-Video-7B-hf": PPTestSettings.fast(),
     "llava-hf/llava-onevision-qwen2-0.5b-ov-hf": PPTestSettings.fast(),
     "openbmb/MiniCPM-Llama3-V-2_5": PPTestSettings.fast(trust_remote_code=True),
-    # TODO: Implement PP
-    # "meta-llama/Llama-3.2-11B-Vision-Instruct": PPTestSettings.fast(),
+    "allenai/Molmo-7B-D-0924": PPTestSettings.fast(trust_remote_code=True),
     "microsoft/Phi-3-vision-128k-instruct": PPTestSettings.fast(trust_remote_code=True),  # noqa: E501
     "mistralai/Pixtral-12B-2409": PPTestSettings.fast(tp_base=2, tokenizer_mode="mistral"),  # noqa: E501
     "Qwen/Qwen-VL-Chat": PPTestSettings.fast(trust_remote_code=True),
     "Qwen/Qwen2-VL-2B-Instruct": PPTestSettings.fast(),
     "fixie-ai/ultravox-v0_3": PPTestSettings.fast(),
-}
-
-CONDITIONAL_GENERATION_MODEL_SETTINGS = {  # type: ignore[var-annotated]
-    # [FAST TESTS]
+    # [Encoder-decoder]
     # TODO: Implement PP
-    # "facebook/bart-base": PPTestSettings.fast(),
+    # "meta-llama/Llama-3.2-11B-Vision-Instruct": PPTestSettings.fast(),
 }
 # yapf: enable
 
@@ -300,7 +299,7 @@ def _compare_tp(
     ("model_name", "parallel_setup", "distributed_backend",
      "trust_remote_code", "tokenizer_mode"),
     [
-        params for model_name, settings in GENERATION_MODEL_SETTINGS.items()
+        params for model_name, settings in TEXT_GENERATION_MODELS.items()
         for params in settings.iter_params(model_name)
         if model_name in TEST_MODELS
     ],
@@ -327,7 +326,7 @@ def test_tp_language_generation(
     ("model_name", "parallel_setup", "distributed_backend",
      "trust_remote_code", "tokenizer_mode"),
     [
-        params for model_name, settings in EMBEDDING_MODEL_SETTINGS.items()
+        params for model_name, settings in EMBEDDING_MODELS.items()
         for params in settings.iter_params(model_name)
         if model_name in TEST_MODELS
     ],
@@ -354,7 +353,7 @@ def test_tp_language_embedding(
     ("model_name", "parallel_setup", "distributed_backend",
      "trust_remote_code", "tokenizer_mode"),
     [
-        params for model_name, settings in MULTIMODAL_MODEL_SETTINGS.items()
+        params for model_name, settings in MULTIMODAL_MODELS.items()
         for params in settings.iter_params(model_name)
         if model_name in TEST_MODELS
     ],
-Original file line number
+Diff line change
@@ Expand Up / @@ -408,7 +408,7 @@ Text Generation @@
         -
       * - :code:`MolmoForCausalLM`
         - Molmo
-        - Image
+        - T + I
         - :code:`allenai/Molmo-7B-D-0924`, :code:`allenai/Molmo-72B-0924`, etc.
         -
         - ✅︎
@@ Expand Down @@