diff --git a/comps/llms/text-generation/vllm-ray/llm.py b/comps/llms/text-generation/vllm-ray/llm.py index dc0c4b669..6d8abd028 100644 --- a/comps/llms/text-generation/vllm-ray/llm.py +++ b/comps/llms/text-generation/vllm-ray/llm.py @@ -21,18 +21,6 @@ from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice -@traceable(run_type="tool") -def post_process_text(text: str): - if text == " ": - return "data: @#$\n\n" - if text == "\n": - return "data:
\n\n" - if text.isspace(): - return None - new_text = text.replace(" ", "@#$") - return f"data: {new_text}\n\n" - - @register_microservice( name="opea_service@llm_vllm_ray", service_type=ServiceType.LLM, @@ -56,19 +44,13 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: - async def stream_generator(): + def stream_generator(): chat_response = "" - async for text in llm.astream(input.query): + for text in llm.stream(input.query): text = text.content chat_response += text - processed_text = post_process_text(text) - if text and processed_text: - if "" in text: - res = text.split("")[0] - if res != "": - yield res - break - yield processed_text + chunk_repr = repr(text.encode("utf-8")) + yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") yield "data: [DONE]\n\n"