Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Online chat formatter #8

Merged
merged 23 commits into from
Jun 20, 2024
Merged
Show file tree
Hide file tree
Changes from 13 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions README.ENG.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()

```python
import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
from lazyllm import pipeline, warp, package, bind
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt="""
You are now an intelligent assistant. Your task is to understand the user's input and convert the outline into a list of nested dictionaries. Each dictionary contains a `title` and a `describe`, where the `title` should clearly indicate the level using Markdown format, and the `describe` is a description and writing guide for that section.
Expand Down Expand Up @@ -133,13 +134,15 @@ Receive as follows:
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
t1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter().prompt(toc_prompt)
t2 = lazyllm.OnlineChatModule(source="openai", stream=False).prompt(completion_prompt)

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
with pipeline() as m:
m.m1 = t1
m.m2 = warp(writter)
m.m3 = bind(collector, m.m1, m.m2)

print(m({'query': 'Please help me write an article about the application of artificial intelligence in the medical field.'}))
```
Expand Down
15 changes: 9 additions & 6 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ mweb = lazyllm.WebModule(ppl, port=23456).start().wait()

```python
import lazyllm
from lazyllm import pipeline, parallel, Identity, warp, package
from lazyllm import pipeline, warp, package, bind
import time
import re, json
from lazyllm.components.formatter import JsonFormatter

toc_prompt=""" 你现在是一个智能助手。你的任务是理解用户的输入,将大纲以列表嵌套字典的列表。每个字典包含一个 `title` 和 `describe`,其中 `title` 中需要用Markdown格式标清层级,`describe` `describe` 是对该段的描述和写作指导。

Expand Down Expand Up @@ -128,13 +129,15 @@ completion_prompt="""
</details>

```python
t1 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=toc_prompt))
t2 = lazyllm.OnlineChatModule(source="openai", stream=False, prompter=ChatPrompter(instruction=completion_prompt))
t1 = lazyllm.OnlineChatModule(source="openai", stream=False).formatter(JsonFormatter()).prompt(toc_prompt)
t2 = lazyllm.OnlineChatModule(source="openai", stream=False).prompt(completion_prompt)

spliter = lambda s: tuple(eval(re.search(r'\[\s*\{.*\}\s*\]', s['message']['content'], re.DOTALL).group()))
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2, lambda d : d['message']['content'])
writter = pipeline(lambda d: json.dumps(d, ensure_ascii=False), t2)
collector = lambda dict_tuple, repl_tuple: "\n".join([v for d in [{**d, "describe": repl_tuple[i]} for i, d in enumerate(dict_tuple)] for v in d.values()])
m = pipeline(t1, spliter, parallel(Identity, warp(writter)), collector)
with pipeline() as m:
m.m1 = t1
m.m2 = warp(writter)
m.m3 = bind(collector, m.m1, m.m2)

print(m({'query':'请帮我写一篇关于人工智能在医疗领域应用的文章。'}))
```
Expand Down
15 changes: 15 additions & 0 deletions docs/source/api/components.rst
Original file line number Diff line number Diff line change
Expand Up @@ -60,3 +60,18 @@ ModelDownloader
.. autoclass:: lazyllm.components.ModelDownloader
:members:
:exclude-members:

Formatter
==========

.. autoclass:: lazyllm.components.formatter.LazyLLMFormatterBase
:members:
:exclude-members:

.. autoclass:: lazyllm.components.JsonFormatter
:members:
:exclude-members:

.. autoclass:: lazyllm.components.EmptyFormatter
:members:
:exclude-members:
4 changes: 3 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@
Loop as loop, Switch as switch, IFS as ifs, Warp as warp)
from .components import (LazyLLMDataprocBase, LazyLLMFinetuneBase, LazyLLMDeployBase,
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp)
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
OnlineChatModule, OnlineEmbeddingModule)
Expand All @@ -33,6 +34,7 @@
'AlpacaPrompter',
'ChatPrompter',
'FastapiApp',
'JsonFormatter',

# flow
'LazyLLMFlowsBase', # pipeline, parallel
Expand Down
6 changes: 3 additions & 3 deletions lazyllm/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,19 +334,19 @@ class LazyLlmRequest(struct):

def split(self, flag=None):
if flag is None:
assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
elif isinstance(flag, int):
assert len(self.kwargs) == 0 and isinstance(self.input, tuple), (
assert len(self.kwargs) == 0 and isinstance(self.input, (tuple, list)), (
f'Only tuple input can be split automatically, your input is {self.input} <{type(self.input)}>')
assert flag == len(self.input), 'input size mismatch with split number'
return [LazyLlmRequest(input=inp, global_parameters=self.global_parameters) for inp in self.input]
elif isinstance(flag, list):
if isinstance(self.input, dict):
assert len(self.kwargs) == 0, 'Cannot provived input and kwargs at the same time for split'
d = self.input
elif isinstance(self.input, tuple):
elif isinstance(self.input, (tuple, list)):
return self.split(len(flag))
else:
assert not self.input, 'Cannot provived input and kwargs at the same time for split'
Expand Down
6 changes: 5 additions & 1 deletion lazyllm/components/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from .validate import LazyLLMValidateBase
from .auto import AutoDeploy, AutoFinetune
from .utils import ModelDownloader
from .formatter import FormatterBase, EmptyFormatter, JsonFormatter

__all__ = [
'register',
Expand All @@ -19,5 +20,8 @@
'FastapiApp',
'AutoDeploy',
'AutoFinetune',
'ModelDownloader'
'ModelDownloader',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
10 changes: 10 additions & 0 deletions lazyllm/components/formatter/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from .formatterBase import LazyLLMFormatterBase, LazyLLMFormatterBase as FormatterBase, EmptyFormatter
from .jsonFormatter import JsonFormatter


__all__ = [
'LazyLLMFormatterBase',
'FormatterBase',
'EmptyFormatter',
'JsonFormatter'
]
50 changes: 50 additions & 0 deletions lazyllm/components/formatter/formatterBase.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from ...common import LazyLLMRegisterMetaClass

def is_number(s: str):
try:
int(s)
return True
except ValueError:
if s.lower == "none" or len(s) == 0:
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
return False
else:
raise ValueError("Invalid number: " + s)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved

class LazyLLMFormatterBase(metaclass=LazyLLMRegisterMetaClass):
def __init__(self, formatter: str = None):
self._formatter = formatter
if self._formatter:
self._parse_formatter()
else:
self._slices = None

def _parse_formatter(self):
# Remove the surrounding brackets
slice_str = self._formatter.strip()[1:-1]
dimensions = slice_str.split(",")
slices = []

for dim in dimensions:
if ":" in dim:
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
parts = dim.split(":")
start = int(parts[0]) if is_number(parts[0]) else None
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
end = int(parts[1]) if len(parts) > 1 and is_number(parts[1]) else None
step = int(parts[2]) if len(parts) > 2 and is_number(parts[2]) else None
slices.append(slice(start, end, step))
else:
slices.append(dim.strip())
self._slices = slices

def _load(self, msg: str):
raise NotImplementedError("This parse str function is not implemented.")

def _parse_py_data_by_formatter(self, py_data):
raise NotImplementedError("This data parse function is not implemented.")

def format(self, msg):
if isinstance(msg, str): msg = self._load(msg)
return self._parse_py_data_by_formatter(msg)

class EmptyFormatter(LazyLLMFormatterBase):
def format(self, msg):
return msg
57 changes: 57 additions & 0 deletions lazyllm/components/formatter/jsonFormatter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import json
from .formatterBase import LazyLLMFormatterBase as FormatterBase
import lazyllm

class JsonFormatter(FormatterBase):
def _extract_json_from_string(self, mixed_str: str):
json_objects = []
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
brace_level = 0
current_json = ""
in_string = False

for char in mixed_str:
if char == '"' and (len(current_json) == 0 or current_json[-1] != '\\'):
in_string = not in_string

if not in_string:
if char == '{':
if brace_level == 0:
current_json = ""
brace_level += 1
elif char == '}':
brace_level -= 1

if brace_level > 0 or (brace_level == 0 and char == '}'):
current_json += char

if brace_level == 0 and current_json:
try:
json.loads(current_json)
wzh1994 marked this conversation as resolved.
Show resolved Hide resolved
json_objects.append(current_json)
current_json = ""
except json.JSONDecodeError:
continue

return json_objects

def _load(self, msg: str):
# Convert str to json format
assert msg.count("{") == msg.count("}"), f"{msg} is not a valid json string."
try:
json_strs = self._extract_json_from_string(msg)
if len(json_strs) == 0:
raise TypeError(f"{msg} is not a valid json string.")
res = []
for json_str in json_strs:
res.append(json.loads(json_str))
return res if len(res) > 1 else res[0]
except Exception as e:
lazyllm.LOG.info(f"Error: {e}")
return ""

def _parse_py_data_by_formatter(self, data, *, slices=None):
if slices is None: slices = self._slices
if not slices: return data
if isinstance(slices[0], slice): return [self._parse_py_data_by_formatter(d, slices=slices[1:])
for d in data[slices[0]]]
else: return self._parse_py_data_by_formatter(data[slices[0]], slices=slices[1:])
Loading