Skip to content

Commit

Permalink
follow up, better tests (vllm-project#128)
Browse files Browse the repository at this point in the history
  • Loading branch information
horheynm authored Aug 9, 2024
1 parent 7f5443c commit 1910660
Showing 1 changed file with 7 additions and 7 deletions.
14 changes: 7 additions & 7 deletions tests/test_quantization/lifecycle/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,10 +239,9 @@ def test_apply_quantization_status(caplog, ignore, should_raise_warning):
from transformers import AutoModelForCausalLM

# load a dense, unquantized tiny llama model
device = "cuda:0"
model_name = "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T"
model = AutoModelForCausalLM.from_pretrained(
model_name, device_map=device, torch_dtype="auto"
model_name, device_map="cpu", torch_dtype="auto"
)

quantization_config_dict = {
Expand All @@ -266,9 +265,10 @@ def test_apply_quantization_status(caplog, ignore, should_raise_warning):
config = QuantizationConfig(**quantization_config_dict)
config.quantization_status = QuantizationStatus.CALIBRATION

if should_raise_warning:
# mismatch in the ignore key of quantization_config_dict
with caplog.at_level(logging.WARNING):
apply_quantization_config(model, config)
else:
# mismatch in the ignore key of quantization_config_dict
with caplog.at_level(logging.WARNING):
apply_quantization_config(model, config)
if should_raise_warning:
assert len(caplog.text) > 0
else:
assert len(caplog.text) == 0

0 comments on commit 1910660

Please sign in to comment.