From ac4a77798ef2f6d093550a234671511e067625f0 Mon Sep 17 00:00:00 2001 From: lkk <33276950+lkk12014402@users.noreply.github.com> Date: Sun, 11 Aug 2024 21:41:06 +0800 Subject: [PATCH] align vllm-ray response format to tgi response format (#452) * align vllm-ray response format to tgi response format * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: changwangss Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- comps/llms/text-generation/vllm-ray/llm.py | 26 ++++------------------ 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index dc0c4b669..6d8abd028 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -21,18 +21,6 @@ from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - @register_microservice( name="opea_service@llm_vllm_ray", service_type=ServiceType.LLM, @@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: - async def stream_generator(): + def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + for text in llm.stream(input.query): text = text.content chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n"