Skip to content

Commit

Permalink
Fix LLM special token issue (#895)
Browse files Browse the repository at this point in the history
* Fix LLM special token issue

Signed-off-by: lvliang-intel <[email protected]>

* update code

Signed-off-by: lvliang-intel <[email protected]>

* update logic

Signed-off-by: lvliang-intel <[email protected]>

* update vllm llm

Signed-off-by: lvliang-intel <[email protected]>

---------

Signed-off-by: lvliang-intel <[email protected]>
Co-authored-by: ZePan110 <[email protected]>
  • Loading branch information
lvliang-intel and ZePan110 authored Nov 14, 2024
1 parent 32bcde4 commit 517a5b0
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 23 deletions.
26 changes: 15 additions & 11 deletions comps/llms/text-generation/tgi/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,11 +112,12 @@ async def stream_generator():
chat_response = ""
async for text in text_generation:
stream_gen_time.append(time.time() - start)
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ SearchedDoc ] chunk:{chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if text not in ["<|im_end|>", "<|endoftext|>"]:
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ SearchedDoc ] chunk:{chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if logflag:
logger.info(f"[ SearchedDoc ] stream response: {chat_response}")
statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0])
Expand Down Expand Up @@ -162,11 +163,12 @@ async def stream_generator():
chat_response = ""
async for text in text_generation:
stream_gen_time.append(time.time() - start)
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ LLMParamsDoc ] chunk:{chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if text not in ["<|im_end|>", "<|endoftext|>"]:
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ LLMParamsDoc ] chunk:{chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if logflag:
logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}")
statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0])
Expand Down Expand Up @@ -271,7 +273,9 @@ def stream_generator():
for c in chat_completion:
if logflag:
logger.info(c)
yield f"data: {c.model_dump_json()}\n\n"
chunk = c.model_dump_json()
if chunk not in ["<|im_end|>", "<|endoftext|>"]:
yield f"data: {chunk}\n\n"
yield "data: [DONE]\n\n"

return StreamingResponse(stream_generator(), media_type="text/event-stream")
Expand Down
22 changes: 12 additions & 10 deletions comps/llms/text-generation/vllm/langchain/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,11 +124,12 @@ async def llm_generate(input: Union[LLMParamsDoc, ChatCompletionRequest, Searche
async def stream_generator():
chat_response = ""
async for text in llm.astream(new_input.query, **parameters):
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ SearchedDoc ] chunk: {chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if text not in ["<|im_end|>", "<|endoftext|>"]:
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ SearchedDoc ] chunk: {chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if logflag:
logger.info(f"[ SearchedDoc ] stream response: {chat_response}")
yield "data: [DONE]\n\n"
Expand Down Expand Up @@ -175,11 +176,12 @@ async def stream_generator():
async def stream_generator():
chat_response = ""
async for text in llm.astream(prompt, **parameters):
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ LLMParamsDoc ] chunk: {chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if text not in ["<|im_end|>", "<|endoftext|>"]:
chat_response += text
chunk_repr = repr(text.encode("utf-8"))
if logflag:
logger.info(f"[ LLMParamsDoc ] chunk: {chunk_repr}")
yield f"data: {chunk_repr}\n\n"
if logflag:
logger.info(f"[ LLMParamsDoc ] stream response: {chat_response}")
yield "data: [DONE]\n\n"
Expand Down
5 changes: 3 additions & 2 deletions comps/llms/text-generation/vllm/llama_index/llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,9 @@ async def llm_generate(input: LLMParamsDoc):

async def stream_generator():
async for text in llm.astream_complete(input.query):
output = text.text
yield f"data: {output}\n\n"
if text.text not in ["<|im_end|>", "<|endoftext|>"]:
output = text.text
yield f"data: {output}\n\n"
if logflag:
logger.info(f"[llm - chat_stream] stream response: {output}")
yield "data: [DONE]\n\n"
Expand Down

0 comments on commit 517a5b0

Please sign in to comment.