diff --git a/lazyllm/components/prompter/builtinPrompt.py b/lazyllm/components/prompter/builtinPrompt.py index c996a424..a56d565a 100644 --- a/lazyllm/components/prompter/builtinPrompt.py +++ b/lazyllm/components/prompter/builtinPrompt.py @@ -176,7 +176,7 @@ def _split_instruction(self, instruction: str): user_instruction = "" if LazyLLMPrompterBase.ISA in instruction and LazyLLMPrompterBase.ISE in instruction: # The instruction includes system prompts and/or user prompts - pattern = re.compile(r"%s(.*)%s" % (LazyLLMPrompterBase.ISA, LazyLLMPrompterBase.ISE)) + pattern = re.compile(r"%s(.*)%s" % (LazyLLMPrompterBase.ISA, LazyLLMPrompterBase.ISE), re.DOTALL) ret = re.split(pattern, instruction) system_instruction = ret[0] user_instruction = ret[1] diff --git a/lazyllm/components/prompter/prompter.py b/lazyllm/components/prompter/prompter.py index 8e9031de..0580c437 100644 --- a/lazyllm/components/prompter/prompter.py +++ b/lazyllm/components/prompter/prompter.py @@ -1,6 +1,7 @@ import re import json import collections +from lazyllm import LOG templates = dict( # Template used by Alpaca-LoRA. @@ -75,7 +76,7 @@ def generate_prompt(self, input, history=None, tools=None, label=None, show=Fals raise RuntimeError(f'Generate prompt failed, and prompt is {self._prompt}; chat-prompt' f' is {self._chat_prompt}; input is {input}; history is {history}') if label: input += label - if self._show or show: print(input) + if self._show or show: LOG.info(input) return input def get_response(self, response, input=None): diff --git a/lazyllm/engine/engine.py b/lazyllm/engine/engine.py index 5fbc7c58..60d44eee 100644 --- a/lazyllm/engine/engine.py +++ b/lazyllm/engine/engine.py @@ -37,7 +37,7 @@ def start(self, nodes: Dict[str, Any]) -> None: @overload def start(self, nodes: List[Dict] = [], edges: List[Dict] = [], resources: List[Dict] = [], - gid: Optional[str] = None, name: Optional[str] = None) -> str: + gid: Optional[str] = None, name: Optional[str] = None, _history_ids: Optional[List[str]] = None) -> str: ... @overload @@ -142,9 +142,10 @@ def _process_hook(self, node, module): class ServerGraph(lazyllm.ModuleBase): - def __init__(self, g: lazyllm.graph, server: Node, web: Node): + def __init__(self, g: lazyllm.graph, server: Node, web: Node, _history_ids: Optional[List[str]] = None): super().__init__() self._g = lazyllm.ActionModule(g) + self._history_ids = _history_ids if server: if server.args.get('port'): raise NotImplementedError('Port is not supported now') self._g = lazyllm.ServerModule(g) @@ -205,7 +206,7 @@ def make_server_resource(kind: str, graph: ServerGraph, args: Dict[str, Any]): @NodeConstructor.register('Graph', 'SubGraph', subitems=['nodes', 'resources']) def make_graph(nodes: List[dict], edges: List[Union[List[str], dict]] = [], - resources: List[dict] = [], enable_server=True): + resources: List[dict] = [], enable_server: bool = True, _history_ids: Optional[List[str]] = None): engine = Engine() server_resources = dict(server=None, web=None) for resource in resources: @@ -238,7 +239,7 @@ def make_graph(nodes: List[dict], edges: List[Union[List[str], dict]] = [], else: g.add_edge(engine._nodes[edge['iid']].name, engine._nodes[edge['oid']].name, formatter) - sg = ServerGraph(g, server_resources['server'], server_resources['web']) + sg = ServerGraph(g, server_resources['server'], server_resources['web'], _history_ids=_history_ids) for kind, node in server_resources.items(): if node: node.args = dict(kind=kind, graph=sg, args=node.args) @@ -430,6 +431,14 @@ def share(self, prompt: str): def forward(self, *args, **kw): return self.vqa(*args, **kw) + @property + def stream(self): + return self._vqa._stream + + @stream.setter + def stream(self, v: bool): + self._vqa._stream = v + @NodeConstructor.register('VQA') def make_vqa(base_model: str, file_resource_id: Optional[str] = None): @@ -437,10 +446,13 @@ def make_vqa(base_model: str, file_resource_id: Optional[str] = None): @NodeConstructor.register('SharedLLM') -def make_shared_llm(llm: str, prompt: Optional[str] = None, file_resource_id: Optional[str] = None): +def make_shared_llm(llm: str, prompt: Optional[str] = None, stream: Optional[bool] = None, + file_resource_id: Optional[str] = None): llm = Engine().build_node(llm).func if file_resource_id: assert isinstance(llm, VQA), 'file_resource_id is only supported in VQA' - return VQA(llm._vqa.share(prompt=prompt), file_resource_id) if file_resource_id else llm.share(prompt=prompt) + r = VQA(llm._vqa.share(prompt=prompt), file_resource_id) if file_resource_id else llm.share(prompt=prompt) + if stream is not None: r.stream = stream + return r @NodeConstructor.register('STT') diff --git a/lazyllm/engine/lightengine.py b/lazyllm/engine/lightengine.py index 3f713cc1..15b50b66 100644 --- a/lazyllm/engine/lightengine.py +++ b/lazyllm/engine/lightengine.py @@ -1,4 +1,4 @@ -from .engine import Engine, Node +from .engine import Engine, Node, ServerGraph import lazyllm from lazyllm import once_wrapper from typing import List, Dict, Optional, Set, Union @@ -56,7 +56,7 @@ def update_node(self, node): self._nodes[node.id] = super(__class__, self).build_node(node) return self._nodes[node.id] - def start(self, nodes, edges=[], resources=[], gid=None, name=None): + def start(self, nodes, edges=[], resources=[], gid=None, name=None, _history_ids=None): if isinstance(nodes, str): assert not edges and not resources and not gid and not name self.build_node(nodes).func.start() @@ -65,7 +65,8 @@ def start(self, nodes, edges=[], resources=[], gid=None, name=None): else: gid, name = gid or str(uuid.uuid4().hex), name or str(uuid.uuid4().hex) node = Node(id=gid, kind='Graph', name=name, args=dict( - nodes=copy.copy(nodes), edges=copy.copy(edges), resources=copy.copy(resources))) + nodes=copy.copy(nodes), edges=copy.copy(edges), + resources=copy.copy(resources), _history_ids=_history_ids)) with set_resources(resources): self.build_node(node).func.start() return gid @@ -106,12 +107,21 @@ def update(self, gid_or_nodes: Union[str, Dict, List[Dict]], nodes: List[Dict], for node in gid_or_nodes: self.update_node(node) def run(self, id: str, *args, _lazyllm_files: Optional[Union[str, List[str]]] = None, - _file_resources: Optional[Dict[str, Union[str, List[str]]]] = None, **kw): + _file_resources: Optional[Dict[str, Union[str, List[str]]]] = None, + _lazyllm_history: Optional[List[List[str]]] = None, **kw): if files := _lazyllm_files: assert len(args) <= 1 and len(kw) == 0, 'At most one query is enabled when file exists' args = [lazyllm.formatter.file(formatter='encode')(dict(query=args[0] if args else '', files=files))] if _file_resources: lazyllm.globals['lazyllm_files'] = _file_resources + f = self.build_node(id).func + lazyllm.FileSystemQueue().dequeue() + if history := _lazyllm_history: + assert isinstance(f, ServerGraph) and (ids := f._history_ids), 'Only graph can support history' + if not isinstance(history, list) and all([isinstance(h, list) for h in history]): + raise RuntimeError('History shoule be [[str, str], ..., [str, str]] (list of list of str)') + lazyllm.globals['chat_history'] = {Engine().build_node(i).func._module_id: history for i in ids} result = self.build_node(id).func(*args, **kw) lazyllm.globals['lazyllm_files'] = {} + lazyllm.globals['chat_history'] = {} return result diff --git a/lazyllm/engine/node.py b/lazyllm/engine/node.py index 0ab31eae..25837f0f 100644 --- a/lazyllm/engine/node.py +++ b/lazyllm/engine/node.py @@ -51,7 +51,7 @@ class NodeArgs(object): init_arguments=dict( base_model=NodeArgs(str), target_path=NodeArgs(str), - stream=NodeArgs(bool, True), + stream=NodeArgs(bool, False), return_trace=NodeArgs(bool, False)), builder_argument=dict( trainset=NodeArgs(str), @@ -67,7 +67,7 @@ class NodeArgs(object): base_url=NodeArgs(str), api_key=NodeArgs(str, None), secret_key=NodeArgs(str, None), - stream=NodeArgs(bool, True), + stream=NodeArgs(bool, False), return_trace=NodeArgs(bool, False)), builder_argument=dict( prompt=NodeArgs(str)), diff --git a/lazyllm/module/module.py b/lazyllm/module/module.py index cddc6ad8..64154268 100644 --- a/lazyllm/module/module.py +++ b/lazyllm/module/module.py @@ -707,6 +707,7 @@ def __init__(self, base_model: Option = '', target_path='', *, stream=False, ret None, lazyllm.finetune.auto, lazyllm.deploy.auto) self._impl._add_father(self) self.prompt() + self._stream = stream base_model = property(lambda self: self._impl._base_model) target_path = property(lambda self: self._impl._target_path) @@ -720,6 +721,14 @@ def series(self): def type(self): return ModelManager.get_model_type(self.base_model).upper() + @property + def stream(self): + return self._stream + + @stream.setter + def stream(self, v: bool): + self._stream = v + def get_all_finetuned_models(self): return self._impl._get_all_finetuned_models() diff --git a/lazyllm/module/onlineChatModule/onlineChatModuleBase.py b/lazyllm/module/onlineChatModule/onlineChatModuleBase.py index f651412e..98b639cb 100644 --- a/lazyllm/module/onlineChatModule/onlineChatModuleBase.py +++ b/lazyllm/module/onlineChatModule/onlineChatModuleBase.py @@ -48,6 +48,14 @@ def series(self): def type(self): return "LLM" + @property + def stream(self): + return self._stream + + @stream.setter + def stream(self, v: bool): + self._stream = v + def prompt(self, prompt=None): if prompt is None: self._prompt = ChatPrompter() diff --git a/tests/advanced_tests/standard_test/test_engine.py b/tests/advanced_tests/standard_test/test_engine.py index a189e31b..aefd9e20 100644 --- a/tests/advanced_tests/standard_test/test_engine.py +++ b/tests/advanced_tests/standard_test/test_engine.py @@ -92,3 +92,38 @@ def test_multimedia(self): r = engine.run(gid, '生成音乐,长笛独奏,大自然之声。') assert '.wav' in r + + def test_stream_and_hostory(self): + resources = [dict(id='0', kind='LocalLLM', name='base', args=dict(base_model='internlm2-chat-7b'))] + nodes = [dict(id='1', kind='SharedLLM', name='m1', args=dict(llm='0', stream=True, prompt=dict( + system='请将我的问题翻译成中文。请注意,请直接输出翻译后的问题,不要反问和发挥', + user='问题: {query} \n, 翻译:'))), + dict(id='2', kind='SharedLLM', name='m2', + args=dict(llm='0', stream=True, + prompt=dict(system='请参考历史对话,回答问题,并保持格式不变。', user='{query}'))), + dict(id='3', kind='JoinFormatter', name='join', args=dict(type='to_dict', names=['query', 'answer'])), + dict(id='4', kind='SharedLLM', stream=False, name='m3', + args=dict(llm='0', prompt=dict(system='你是一个问答机器人,会根据用户的问题作出回答。', + user='请结合历史对话和本轮的问题,总结我们的全部对话。本轮情况如下:\n {query}, 回答: {answer}')))] + engine = LightEngine() + gid = engine.start(nodes, edges=[['__start__', '1'], ['1', '2'], ['1', '3'], ['2', '3'], ['3', '4'], + ['4', '__end__']], resources=resources, _history_ids=['2', '4']) + history = [['水的沸点是多少?', '您好,我的答案是:水的沸点在标准大气压下是100摄氏度。'], + ['世界上最大的动物是什么?', '您好,我的答案是:蓝鲸是世界上最大的动物。'], + ['人一天需要喝多少水?', '您好,我的答案是:一般建议每天喝8杯水,大约2升。'], + ['雨后为什么会有彩虹?', '您好,我的答案是:雨后阳光通过水滴发生折射和反射形成了彩虹。'], + ['月亮会发光吗?', '您好,我的答案是:月亮本身不会发光,它反射太阳光。'], + ['一年有多少天', '您好,我的答案是:一年有365天,闰年有366天。']] + + stream_result = '' + with lazyllm.ThreadPoolExecutor(1) as executor: + future = executor.submit(engine.run, gid, 'How many hours are there in a day?', _lazyllm_history=history) + while True: + if value := lazyllm.FileSystemQueue().dequeue(): + stream_result += f"{''.join(value)}" + elif future.done(): + break + result = future.result() + assert '一天' in stream_result and '小时' in stream_result + assert '您好,我的答案是' in stream_result and '24' in stream_result + assert '蓝鲸' in result and '水' in result diff --git a/tests/charge_tests/test_engine.py b/tests/charge_tests/test_engine.py index 7f745b87..26892784 100644 --- a/tests/charge_tests/test_engine.py +++ b/tests/charge_tests/test_engine.py @@ -222,3 +222,37 @@ def test_register_tools(self): unit = 'Celsius' ret = engine.run(gid, f"What is the temperature in {city_name} today in {unit}?") assert city_name in ret and unit in ret and '10' in ret + + def test_stream_and_hostory(self): + nodes = [dict(id='1', kind='OnlineLLM', name='m1', args=dict(source='glm', stream=True, prompt=dict( + system='请将我的问题翻译成中文。请注意,请直接输出翻译后的问题,不要反问和发挥', + user='问题: {query} \n, 翻译:'))), + dict(id='2', kind='OnlineLLM', name='m2', + args=dict(source='glm', stream=True, + prompt=dict(system='请参考历史对话,回答问题,并保持格式不变。', user='{query}'))), + dict(id='3', kind='JoinFormatter', name='join', args=dict(type='to_dict', names=['query', 'answer'])), + dict(id='4', kind='OnlineLLM', stream=False, name='m3', args=dict(source='glm', prompt=dict( + system='你是一个问答机器人,会根据用户的问题作出回答。', + user='请结合历史对话和本轮的问题,总结我们的全部对话。本轮情况如下:\n {query}, 回答: {answer}')))] + engine = LightEngine() + gid = engine.start(nodes, edges=[['__start__', '1'], ['1', '2'], ['1', '3'], ['2', '3'], + ['3', '4'], ['4', '__end__']], _history_ids=['2', '4']) + history = [['水的沸点是多少?', '您好,我的答案是:水的沸点在标准大气压下是100摄氏度。'], + ['世界上最大的动物是什么?', '您好,我的答案是:蓝鲸是世界上最大的动物。'], + ['人一天需要喝多少水?', '您好,我的答案是:一般建议每天喝8杯水,大约2升。'], + ['雨后为什么会有彩虹?', '您好,我的答案是:雨后阳光通过水滴发生折射和反射形成了彩虹。'], + ['月亮会发光吗?', '您好,我的答案是:月亮本身不会发光,它反射太阳光。'], + ['一年有多少天', '您好,我的答案是:一年有365天,闰年有366天。']] + + stream_result = '' + with lazyllm.ThreadPoolExecutor(1) as executor: + future = executor.submit(engine.run, gid, 'How many hours are there in a day?', _lazyllm_history=history) + while True: + if value := lazyllm.FileSystemQueue().dequeue(): + stream_result += f"{''.join(value)}" + elif future.done(): + break + result = future.result() + assert '一天' in stream_result and '小时' in stream_result + assert '您好,我的答案是' in stream_result and '24' in stream_result + assert '蓝鲸' in result and '水' in result