forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Misc][Quark] Upstream Quark format to VLLM (vllm-project#10765)
Signed-off-by: kewang-xlnx <[email protected]> Signed-off-by: kewang2 <[email protected]> Co-authored-by: kewang2 <[email protected]> Co-authored-by: Michael Goin <[email protected]>
- Loading branch information
Showing
32 changed files
with
1,264 additions
and
70 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
"""Test model set-up and weight loading for quark-quantized models. | ||
Run `pytest tests/quantization/test_quark.py`. | ||
""" | ||
|
||
import torch | ||
|
||
from vllm.model_executor.layers.quantization.quark.quark import ( # noqa: E501 | ||
QuarkLinearMethod, QuarkW8A8Fp8) | ||
|
||
|
||
def test_quark_fp8(vllm_runner): | ||
model_path = "amd/Llama-3.1-8B-Instruct-FP8-KV-Quark-test" | ||
with vllm_runner(model_path) as llm: | ||
model = llm.model.llm_engine.model_executor.driver_worker.model_runner.model # noqa: E501 | ||
layer = model.model.layers[0] | ||
|
||
qkv_proj = layer.self_attn.qkv_proj | ||
|
||
assert isinstance(qkv_proj.quant_method, QuarkLinearMethod) | ||
assert isinstance(qkv_proj.scheme, QuarkW8A8Fp8) | ||
|
||
if isinstance(qkv_proj.scheme, QuarkW8A8Fp8): | ||
assert len(qkv_proj.input_scale.shape) == 0 | ||
assert qkv_proj.weight.dtype is torch.float8_e4m3fn | ||
#assert qkv_proj.weight.dtype is torch.float8_e4m3fnuz | ||
assert len(qkv_proj.weight_scale.shape) == 0 | ||
|
||
output = llm.generate_greedy("Hello my name is", max_tokens=20) | ||
assert output |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
Oops, something went wrong.