From 19f9179712f61788e1fb60bee9f6fd09e3b89258 Mon Sep 17 00:00:00 2001 From: Shawn Tan Date: Tue, 24 Sep 2024 04:41:19 +0000 Subject: [PATCH] Moving tests. --- tests/models/test_granitemoe.py | 48 --------------------------------- 1 file changed, 48 deletions(-) delete mode 100644 tests/models/test_granitemoe.py diff --git a/tests/models/test_granitemoe.py b/tests/models/test_granitemoe.py deleted file mode 100644 index 74e6395937058..0000000000000 --- a/tests/models/test_granitemoe.py +++ /dev/null @@ -1,48 +0,0 @@ -"""Compare the outputs of HF and vLLM for Granite models using greedy sampling. - -Run `pytest tests/models/test_granite.py`. -""" -import importlib.metadata - -import pytest - -from .utils import check_logprobs_close - -TRANSFORMERS_VERSION = tuple( - map(int, - importlib.metadata.version("transformers").split("."))) - -MODELS = [ - "ibm/PowerMoE-3b", -] - - -# TODO awaiting huggingface PR acceptance. -# @pytest.mark.skipif(TRANSFORMERS_VERSION < (4, 45), -# reason="granite model test requires transformers >= 4.45") -@pytest.mark.parametrize("model", MODELS) -@pytest.mark.parametrize("dtype", ["bfloat16"]) -@pytest.mark.parametrize("max_tokens", [64]) -@pytest.mark.parametrize("num_logprobs", [5]) -def test_models( - hf_runner, - vllm_runner, - example_prompts, - model: str, - dtype: str, - max_tokens: int, - num_logprobs: int, -) -> None: - with hf_runner(model, dtype=dtype) as hf_model: - hf_outputs = hf_model.generate_greedy_logprobs_limit( - example_prompts, max_tokens, num_logprobs) - - with vllm_runner(model, dtype=dtype) as vllm_model: - vllm_outputs = vllm_model.generate_greedy_logprobs( - example_prompts, max_tokens, num_logprobs) - check_logprobs_close( - outputs_0_lst=hf_outputs, - outputs_1_lst=vllm_outputs, - name_0="hf", - name_1="vllm", - )