fix vllm llamaindex stream bug (opea-project#679)

Signed-off-by: Xinyao Wang <[email protected]>
tileintel · Sep 12, 2024 · ca94c60 · ca94c60
1 parent 6ce6551
commit ca94c60
Showing 1 changed file with 3 additions and 5 deletions.
diff --git a/comps/llms/text-generation/vllm/llama_index/llm.py b/comps/llms/text-generation/vllm/llama_index/llm.py
@@ -57,13 +57,11 @@ def llm_generate(input: LLMParamsDoc):
     if input.streaming:
 
         def stream_generator():
-            chat_response = ""
             for text in llm.stream_complete(input.query):
-                chat_response += text
-                chunk_repr = repr(text.encode("utf-8"))
-                yield f"data: {chunk_repr}\n\n"
+                output = text.text
+                yield f"data: {output}\n\n"
             if logflag:
-                logger.info(f"[llm - chat_stream] stream response: {chat_response}")
+                logger.info(f"[llm - chat_stream] stream response: {output}")
             yield "data: [DONE]\n\n"
 
         return StreamingResponse(stream_generator(), media_type="text/event-stream")