Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Online chat formatter #8

Merged
merged 23 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 8 additions & 8 deletions README.ENG.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()

```python
import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
from lazyllm import pipeline, warp, package, bind
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt="""
You are now an intelligent assistant. Your task is to understand the user's input and convert the outline into a list of nested dictionaries. Each dictionary contains a `title` and a `describe`, where the `title` should clearly indicate the level using Markdown format, and the `describe` is a description and writing guide for that section.
Expand Down Expand Up @@ -134,17 +135,16 @@ This is the expanded content for writing.
Receive as follows:

"""

writer_prompt = {"system": completion_prompt, "user": '{"title": {title}, "describe": {describe}}'}
```
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
with pipeline() as m:
m.m1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter().prompt(toc_prompt)
m.m2 = warp(lazyllm.OnlineChatModule(source="openai", stream=False).prompt(writer_prompt))
m.m3 = (lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])) | bind(m.m1, m.m2)

print(m({'query': 'Please help me write an article about the application of artificial intelligence in the medical field.'}))
```
Expand Down
16 changes: 8 additions & 8 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,9 +90,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()

```python
import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
from lazyllm import pipeline, warp, package, bind
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt=""" 你现在是一个智能助手。你的任务是理解用户的输入,将大纲以列表嵌套字典的列表。每个字典包含一个 `title` 和 `describe`,其中 `title` 中需要用Markdown格式标清层级,`describe` `describe` 是对该段的描述和写作指导。

Expand Down Expand Up @@ -129,17 +130,16 @@ completion_prompt="""
接收如下:

"""

writer_prompt = {"system": completion_prompt, "user": '{"title": {title}, "describe": {describe}}'}
```
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
with pipeline() as m:
m.m1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter()).prompt(toc_prompt)
m.m2 = warp(lazyllm.OnlineChatModule(source="openai", stream=False).prompt(writer_prompt))
m.m3 = (lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])) | bind(m.m1, m.m2)

print(m({'query':'请帮我写一篇关于人工智能在医疗领域应用的文章。'}))
```
Expand Down
15 changes: 15 additions & 0 deletions docs/source/api/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,18 @@ ModelDownloader
.. autoclass:: lazyllm.components.ModelDownloader
:members:
:exclude-members:

Formatter
==========

.. autoclass:: lazyllm.components.formatter.LazyLLMFormatterBase
:members:
:exclude-members:

.. autoclass:: lazyllm.components.JsonFormatter
:members:
:exclude-members:

.. autoclass:: lazyllm.components.EmptyFormatter
:members:
:exclude-members:
4 changes: 3 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Loop as loop, Switch as switch, IFS as ifs, Warp as warp)
from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp)
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
OnlineChatModule, OnlineEmbeddingModule)
Expand All @@ -33,6 +34,7 @@
'AlpacaPrompter',
'ChatPrompter',
'FastapiApp',
'JsonFormatter',

# flow
'LazyLLMFlowsBase', # pipeline, parallel
Expand Down
6 changes: 3 additions & 3 deletions lazyllm/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,19 +334,19 @@ class LazyLlmRequest(struct):

def split(self, flag=None):
if flag is None:
assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
elif isinstance(flag, int):
assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
assert flag == len(self.input), 'input size mismatch with split number'
return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
elif isinstance(flag, list):
if isinstance(self.input, dict):
assert len(self.kwargs) == 0, 'Cannot provived input and kwargs at the same time for split'
d = self.input
elif isinstance(self.input, tuple):
elif isinstance(self.input, (tuple, list)):
return self.split(len(flag))
else:
assert not self.input, 'Cannot provived input and kwargs at the same time for split'
Expand Down
2 changes: 1 addition & 1 deletion lazyllm/common/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
"log_format",
str,
"{process}: <green>{time:YYYY-MM-DD HH:mm:ss}</green> {extra[name]} "
"<level>{level}</level>: ({name}) <cyan>{message}</cyan>",
"<level>{level}</level>: ({name}:{line}) <cyan>{message}</cyan>",
"LOG_FORMAT",
)
lazyllm.config.add("log_dir", str, "~/.lazyllm", "LOG_DIR")
Expand Down
6 changes: 5 additions & 1 deletion lazyllm/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .validate import LazyLLMValidateBase
from .auto import AutoDeploy, AutoFinetune
from .utils import ModelDownloader
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter

__all__ = [
'register',
Expand All @@ -19,5 +20,8 @@
'FastapiApp',
'AutoDeploy',
'AutoFinetune',
'ModelDownloader'
'ModelDownloader',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
10 changes: 10 additions & 0 deletions lazyllm/components/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .formatterBase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
from .jsonFormatter import JsonFormatter


__all__ = [
'LazyLLMFormatterBase',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
50 changes: 50 additions & 0 deletions lazyllm/components/formatter/formatterBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from ...common import LazyLLMRegisterMetaClass

def is_number(s: str):
try:
int(s)
return True
except ValueError:
if s == "None" or len(s) == 0:
return False
else:
raise ValueError("Invalid number: " + s + ". You can enter an integer, None or an empyt string.")

class LazyLLMFormatterBase(metaclass=LazyLLMRegisterMetaClass):
def __init__(self, formatter: str = None):
self._formatter = formatter
if self._formatter:
self._parse_formatter()
else:
self._slices = None

def _parse_formatter(self):
# Remove the surrounding brackets
slice_str = self._formatter.strip()[1:-1]
dimensions = slice_str.split(",")
slices = []

for dim in dimensions:
if ":" in dim:
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
parts = dim.split(":")
start = int(parts[0]) if is_number(parts[0]) else None
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
end = int(parts[1]) if len(parts) > 1 and is_number(parts[1]) else None
step = int(parts[2]) if len(parts) > 2 and is_number(parts[2]) else None
slices.append(slice(start, end, step))
else:
slices.append(dim.strip())
self._slices = slices

def _load(self, msg: str):
raise NotImplementedError("This parse str function is not implemented.")

def _parse_py_data_by_formatter(self, py_data):
raise NotImplementedError("This data parse function is not implemented.")

def format(self, msg):
if isinstance(msg, str): msg = self._load(msg)
return self._parse_py_data_by_formatter(msg)

class EmptyFormatter(LazyLLMFormatterBase):
def format(self, msg):
return msg
57 changes: 57 additions & 0 deletions lazyllm/components/formatter/jsonFormatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
from .formatterBase import LazyLLMFormatterBase as FormatterBase
import lazyllm

class JsonFormatter(FormatterBase):
def _extract_json_from_string(self, mixed_str: str):
json_objects = []
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
brace_level = 0
current_json = ""
in_string = False

for char in mixed_str:
if char == '"' and (len(current_json) == 0 or current_json[-1] != '\\'):
in_string = not in_string

if not in_string:
if char == '{':
if brace_level == 0:
current_json = ""
brace_level += 1
elif char == '}':
brace_level -= 1

if brace_level > 0 or (brace_level == 0 and char == '}'):
current_json += char

if brace_level == 0 and current_json:
try:
json.loads(current_json)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
json_objects.append(current_json)
current_json = ""
except json.JSONDecodeError:
continue

return json_objects

def _load(self, msg: str):
# Convert str to json format
assert msg.count("{") == msg.count("}"), f"{msg} is not a valid json string."
try:
json_strs = self._extract_json_from_string(msg)
if len(json_strs) == 0:
raise TypeError(f"{msg} is not a valid json string.")
res = []
for json_str in json_strs:
res.append(json.loads(json_str))
return res if len(res) > 1 else res[0]
except Exception as e:
lazyllm.LOG.info(f"Error: {e}")
return ""

def _parse_py_data_by_formatter(self, data, *, slices=None):
if slices is None: slices = self._slices
if not slices: return data
if isinstance(slices[0], slice): return [self._parse_py_data_by_formatter(d, slices=slices[1:])
for d in data[slices[0]]]
else: return self._parse_py_data_by_formatter(data[slices[0]], slices=slices[1:])
11 changes: 8 additions & 3 deletions lazyllm/components/prompter/alpacaPrompter.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,20 @@
from typing import List, Union, Optional
from typing import List, Union, Optional, Dict
from .builtinPrompt import LazyLLMPrompterBase

class AlpacaPrompter(LazyLLMPrompterBase):
def __init__(self, instruction: Union[None, str] = None,
def __init__(self, instruction: Union[None, str, Dict[str, str]] = None,
extro_keys: Union[None, List[str]] = None, show: bool = False, tools: Optional[List] = None):
super(__class__, self).__init__(show, tools=tools)
if isinstance(instruction, dict):
splice_struction = instruction.get("system", "") + self._isa + instruction.get("user", "") + self._ise
instruction = splice_struction
instruction_template = ("Below is an instruction that describes a task, paired with extra messages such as "
"input that provides further context if possible. Write a response that "
f"appropriately completes the request.\n\n ### Instruction:\n{instruction}"
"\n\n" + LazyLLMPrompterBase._get_extro_key_template(extro_keys))
self._init_prompt("{system}\n{instruction}\n{tools}### Response:\n", instruction_template, "### Response:")
self._init_prompt("{system}\n{instruction}\n{tools}\n{input}### Response:\n",
instruction_template,
"### Response:")

def _check_values(self, instruction, input, history, tools):
assert not history, f"Chat history is not supported in {__class__}."
Expand Down
25 changes: 22 additions & 3 deletions lazyllm/components/prompter/builtinPrompt.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from typing import Dict, Union, Any, List, Callable, Optional
from ...common import LazyLLMRegisterMetaClass
from lazyllm import LOG
from functools import reduce
import json
import re

Expand All @@ -11,6 +12,8 @@ def __init__(self, show=False, tools=None):
self._show = show
self._tools = tools
self._pre_hook = None
self._isa = "<!lazyllm-spliter!>"
self._ise = "</!lazyllm-spliter!>"
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved

def _init_prompt(self, template: str, instruction_template: str, split: Union[None, str] = None):
self._template = template
Expand Down Expand Up @@ -74,9 +77,11 @@ def _get_instruction_and_input(self, input):
return self._instruction_template, input
assert isinstance(input, dict)
kwargs = {k: input.pop(k) for k in prompt_keys}
assert len(input) <= 1, f'Unexpected keys found in input: {list(input.keys())}'
return (self._instruction_template.format(**kwargs) if len(kwargs) > 0 else self._instruction_template,
list(input.values())[0] if input else '')
self._instruction_template = reduce(lambda s, kv: s.replace(f"{{{kv[0]}}}", kv[1]),
kwargs.items(),
self._instruction_template)\
if len(kwargs) > 0 else self._instruction_template
return (self._instruction_template, list(input.values())[0] if input else "")

def _check_values(self, instruction, input, history, tools): pass

Expand Down Expand Up @@ -105,6 +110,18 @@ def pre_hook(self, func: Optional[Callable] = None):
self._pre_hook = func
return self

def _split_instruction(self, instruction: str):
system_instruction = instruction
user_instruction = ""
if self._isa in instruction and self._ise in instruction:
# The instruction includes system prompts and/or user prompts
pattern = re.compile(r"%s(.*)%s" % (self._isa, self._ise))
ret = re.split(pattern, instruction)
system_instruction = ret[0]
user_instruction = ret[1]

return system_instruction, user_instruction

def generate_prompt(self, input: Union[str, Dict[str, str], None] = None,
history: List[Union[List[str], Dict[str, Any]]] = None,
tools: Union[List[Dict[str, Any]], None] = None,
Expand All @@ -116,6 +133,8 @@ def generate_prompt(self, input: Union[str, Dict[str, str], None] = None,
history = self._get_histories(history, return_dict=return_dict)
tools = self._get_tools(tools, return_dict=return_dict)
self._check_values(instruction, input, history, tools)
instruction, user_instruction = self._split_instruction(instruction)
input = user_instruction + input
func = self._generate_prompt_dict_impl if return_dict else self._generate_prompt_impl
result = func(instruction, input, history, tools, label)
if self._show or show: LOG.info(result)
Expand Down
7 changes: 5 additions & 2 deletions lazyllm/components/prompter/chatPrompter.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from typing import List, Union, Optional
from typing import List, Union, Optional, Dict
from .builtinPrompt import LazyLLMPrompterBase

class ChatPrompter(LazyLLMPrompterBase):
def __init__(self, instruction: Union[None, str] = None,
def __init__(self, instruction: Union[None, str, Dict[str, str]] = None,
extro_keys: Union[None, List[str]] = None, show: bool = False, tools: Optional[List] = None):
super(__class__, self).__init__(show, tools=tools)
if isinstance(instruction, dict):
splice_instruction = instruction.get("system", "") + self._isa + instruction.get("user", "") + self._ise
instruction = splice_instruction
instruction_template = f'{instruction}\n{{extro_keys}}\n'.replace(
'{extro_keys}', LazyLLMPrompterBase._get_extro_key_template(extro_keys))
self._init_prompt("{sos}{system}{instruction}{tools}{eos}\n\n{history}\n{soh}\n{input}\n{eoh}{soa}\n",
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
Expand Down
Loading