forked from vllm-project/vllm
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Bugfix][Frontend] Fix Issues Under High Load With
zeromq
Frontend (v…
…llm-project#7394) Co-authored-by: Nick Hill <[email protected]> Signed-off-by: Alvant <[email protected]>
- Loading branch information
Showing
9 changed files
with
322 additions
and
141 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,55 @@ | ||
""" | ||
This file test accuracy of the vLLM server via LMEval. | ||
It uses local-completions, which interacts with vLLM | ||
through the OAI API with N concurrent connections. | ||
This simulates real work usage of the API and makes | ||
sure that the zmq frontend mp RPC message passing and | ||
AsyncLLMEngine are working correctly. | ||
""" | ||
|
||
import lm_eval | ||
import pytest | ||
|
||
from ...utils import RemoteOpenAIServer | ||
|
||
MODEL_NAME = "Qwen/Qwen2-1.5B-Instruct" | ||
NUM_CONCURRENT = 500 | ||
TASK = "gsm8k" | ||
FILTER = "exact_match,strict-match" | ||
RTOL = 0.03 | ||
EXPECTED_VALUE = 0.58 | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def server(): | ||
args = [ | ||
"--max-model-len", "4096", "--enable-chunked-prefill", | ||
"--disable-log-requests", "--enforce-eager" | ||
] | ||
|
||
with RemoteOpenAIServer(MODEL_NAME, args) as remote_server: | ||
yield remote_server | ||
|
||
|
||
@pytest.fixture(scope="module") | ||
def server_data(server): | ||
return { | ||
"url": f"{server.url_for('v1')}/completions", | ||
} | ||
|
||
|
||
def test_lm_eval_accuracy(server_data): | ||
model_args = (f"model={MODEL_NAME}," | ||
f"base_url={server_data['url']}," | ||
f"num_concurrent={NUM_CONCURRENT},tokenized_requests=False") | ||
|
||
results = lm_eval.simple_evaluate( | ||
model="local-completions", | ||
model_args=model_args, | ||
tasks=TASK, | ||
) | ||
|
||
measured_value = results["results"][TASK][FILTER] | ||
assert (measured_value - RTOL < EXPECTED_VALUE | ||
and measured_value + RTOL > EXPECTED_VALUE | ||
), f"Expected: {EXPECTED_VALUE} | Measured: {measured_value}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.