Skip to content

Commit

Permalink
Merge branch 'main' into dev-docmanager
Browse files Browse the repository at this point in the history
  • Loading branch information
wzh1994 committed Nov 20, 2024
2 parents b21841f + c2bb6ff commit e6e4c48
Show file tree
Hide file tree
Showing 91 changed files with 2,839 additions and 1,053 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@ jobs:
export PYTHONPATH=$PWD:$PYTHONPATH
export LAZYLLM_DATA_PATH=/mnt/lustre/share_data/lazyllm/data/
export LAZYLLM_MODEL_PATH=/mnt/lustre/share_data/lazyllm/models
export LAZYLLM_HOME="${{ env.CI_PATH }}/${{ github.run_id }}-${{ github.job }}"
mkdir -p $LAZYLLM_HOME
python -m pytest --lf --last-failed-no-failures=all --durations=0 --reruns=2 -v tests/basic_tests/
AdvancedStandardTests:
Expand All @@ -85,6 +87,8 @@ jobs:
export PYTHONPATH=$PWD:$PYTHONPATH
export LAZYLLM_DATA_PATH=/mnt/lustre/share_data/lazyllm/data/
export LAZYLLM_MODEL_PATH=/mnt/lustre/share_data/lazyllm/models
export LAZYLLM_HOME="${{ env.CI_PATH }}/${{ github.run_id }}-${{ github.job }}"
mkdir -p $LAZYLLM_HOME
source ~/ENV/env.sh
python -m pytest --lf --last-failed-no-failures=all --durations=0 --reruns=2 -v tests/advanced_tests/standard_test/
Expand All @@ -101,6 +105,8 @@ jobs:
export PYTHONPATH=$PWD:$PYTHONPATH
export LAZYLLM_DATA_PATH=/mnt/lustre/share_data/lazyllm/data/
export LAZYLLM_MODEL_PATH=/mnt/lustre/share_data/lazyllm/models
export LAZYLLM_HOME="${{ env.CI_PATH }}/${{ github.run_id }}-${{ github.job }}"
mkdir -p $LAZYLLM_HOME
python -m pytest --lf --last-failed-no-failures=all --durations=0 --reruns=2 -v tests/advanced_tests/full_test/
ChargeTests:
Expand All @@ -114,5 +120,7 @@ jobs:
export PYTHONPATH=$PWD:$PYTHONPATH
export LAZYLLM_DATA_PATH=/mnt/lustre/share_data/lazyllm/data/
export LAZYLLM_MODEL_PATH=/mnt/lustre/share_data/lazyllm/models
export LAZYLLM_HOME="${{ env.CI_PATH }}/${{ github.run_id }}-${{ github.job }}"
mkdir -p $LAZYLLM_HOME
source ~/ENV/env.sh
python -m pytest --lf --last-failed-no-failures=all --durations=0 --reruns=2 -v tests/charge_tests
2 changes: 1 addition & 1 deletion LazyLLM-Env
Submodule LazyLLM-Env updated 1 files
+169 −1 poetry.lock
6 changes: 3 additions & 3 deletions docs/scripts/lazynote/editor/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ def __init__(self, gen_docstring: Callable[[Optional[str], str], str], pattern:
self.pattern = pattern
self.module = module
self.module_dict = self.create_module_dict(module)
self.current_class: Optional[str] = None
self.current_class: Optional[str] = ''

def create_module_dict(self, module: Any) -> Dict[str, Any]:
"""
Expand Down Expand Up @@ -89,7 +89,7 @@ def visit_ClassDef(self, node: cst.ClassDef) -> None:
Args:
node (cst.ClassDef): The ClassDef node.
"""
self.current_class = node.name.value
self.current_class = f'{self.current_class}.{node.name.value}'.lstrip('.')

def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef) -> cst.ClassDef:
"""
Expand All @@ -102,7 +102,7 @@ def leave_ClassDef(self, original_node: cst.ClassDef, updated_node: cst.ClassDef
Returns:
cst.ClassDef: The updated ClassDef node with a new docstring.
"""
self.current_class = None
self.current_class = self.current_class[:(lambda x: 0 if x < 0 else x)(self.current_class.rfind('.'))]
obj = self._get_obj_by_name(original_node.name.value)
docstring = obj.__doc__ if obj else None
return self._update_node_with_new_docstring(original_node, updated_node, docstring)
Expand Down
63 changes: 63 additions & 0 deletions examples/rag_map_store_with_milvus_index.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
# -*- coding: utf-8 -*-

import os
import lazyllm
from lazyllm import bind
import tempfile

def run(query):
_, store_file = tempfile.mkstemp(suffix=".db")

milvus_store_conf = {
'type': 'map',
'indices': {
'smart_embedding_index': {
'backend': 'milvus',
'kwargs': {
'uri': store_file,
'embedding_index_type': 'HNSW',
'embedding_metric_type': 'COSINE',
},
},
},
}

documents = lazyllm.Document(dataset_path="rag_master",
embed=lazyllm.TrainableModule("bge-large-zh-v1.5"),
manager=False,
store_conf=milvus_store_conf)

documents.create_node_group(name="sentences",
transform=lambda s: '。'.split(s))

prompt = 'You will play the role of an AI Q&A assistant and complete a dialogue task.'\
' In this task, you need to provide your answer based on the given context and question.'

with lazyllm.pipeline() as ppl:
ppl.retriever = lazyllm.Retriever(doc=documents, group_name="sentences", topk=3,
index='smart_embedding_index')

ppl.reranker = lazyllm.Reranker(name='ModuleReranker',
model="bge-reranker-large",
topk=1,
output_format='content',
join=True) | bind(query=ppl.input)

ppl.formatter = (
lambda nodes, query: dict(context_str=nodes, query=query)
) | bind(query=ppl.input)

ppl.llm = lazyllm.TrainableModule('internlm2-chat-7b').prompt(
lazyllm.ChatPrompter(instruction=prompt, extro_keys=['context_str']))

rag = lazyllm.ActionModule(ppl)
rag.start()
res = rag(query)

os.remove(store_file)

return res

if __name__ == '__main__':
res = run('何为天道?')
print(f'answer: {res}')
69 changes: 69 additions & 0 deletions examples/rag_milvus_store.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
# -*- coding: utf-8 -*-

import os
import lazyllm
from lazyllm import bind, config
from lazyllm.tools.rag import DocField
import shutil

class TmpDir:
def __init__(self):
self.root_dir = os.path.expanduser(os.path.join(config['home'], 'rag_for_ut'))
self.rag_dir = os.path.join(self.root_dir, 'rag_master')
os.makedirs(self.rag_dir, exist_ok=True)
# creates a dummy file for rag
with open(os.path.join(self.rag_dir, '_dummy.txt'), "wb") as fd:
fd.write(b'dsfjfasfkjdsfewifjewofjefiejw')
self.store_file = os.path.join(self.root_dir, "milvus.db")

def __del__(self):
shutil.rmtree(self.root_dir)

tmp_dir = TmpDir()

milvus_store_conf = {
'type': 'milvus',
'kwargs': {
'uri': tmp_dir.store_file,
'embedding_index_type': 'HNSW',
'embedding_metric_type': 'COSINE',
},
}

doc_fields = {
'comment': DocField(data_type=DocField.DTYPE_VARCHAR, max_size=65535, default_value=' '),
'signature': DocField(data_type=DocField.DTYPE_VARCHAR, max_size=32, default_value=' '),
}

prompt = 'You will play the role of an AI Q&A assistant and complete a dialogue task.'\
' In this task, you need to provide your answer based on the given context and question.'

documents = lazyllm.Document(dataset_path=tmp_dir.rag_dir,
embed=lazyllm.TrainableModule("bge-large-zh-v1.5"),
manager=True,
store_conf=milvus_store_conf,
doc_fields=doc_fields)

documents.create_node_group(name="block", transform=lambda s: s.split("\n") if s else '')

with lazyllm.pipeline() as ppl:
ppl.retriever = lazyllm.Retriever(doc=documents, group_name="block", topk=3)

ppl.reranker = lazyllm.Reranker(name='ModuleReranker',
model="bge-reranker-large",
topk=1,
output_format='content',
join=True) | bind(query=ppl.input)

ppl.formatter = (
lambda nodes, query: dict(context_str=nodes, query=query)
) | bind(query=ppl.input)

ppl.llm = lazyllm.TrainableModule('internlm2-chat-7b').prompt(
lazyllm.ChatPrompter(instruction=prompt, extro_keys=['context_str']))

if __name__ == '__main__':
rag = lazyllm.ActionModule(ppl)
rag.start()
res = rag('何为天道?')
print(f'answer: {res}')
7 changes: 6 additions & 1 deletion lazyllm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
LazyLLMValidateBase, register as component_register, Prompter,
AlpacaPrompter, ChatPrompter, FastapiApp, JsonFormatter, FileFormatter)

from .module import (ModuleBase, UrlModule, TrainableModule, ActionModule,
from .module import (ModuleBase, ModuleBase as Module, UrlModule, TrainableModule, ActionModule,
ServerModule, TrialModule, register as module_register,
OnlineChatModule, OnlineEmbeddingModule, AutoModel)
from .client import redis_client
from .hook import LazyLLMHook
from .tools import (Document, Reranker, Retriever, WebModule, ToolManager, FunctionCall,
FunctionCallAgent, fc_register, ReactAgent, PlanAndSolveAgent, ReWOOAgent, SentenceSplitter,
LLMParser)
Expand Down Expand Up @@ -47,6 +48,7 @@

# module
'ModuleBase',
'Module',
'UrlModule',
'TrainableModule',
'ActionModule',
Expand All @@ -61,6 +63,9 @@
# client
'redis_client',

# hook
'LazyLLMHook',

# tools
'Document',
'Retriever',
Expand Down
3 changes: 2 additions & 1 deletion lazyllm/common/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from .registry import LazyLLMRegisterMetaClass, _get_base_cls_from_registry, Register
from .common import package, kwargs, arguments, LazyLLMCMD, timeout, final, ReadOnlyWrapper, DynamicDescriptor
from .common import package, kwargs, arguments, LazyLLMCMD, timeout, final, ReadOnlyWrapper, DynamicDescriptor, override
from .common import FlatList, Identity, ResultCollector, ArgsDict, CaseInsensitiveDict
from .common import ReprRule, make_repr, modify_repr
from .common import once_flag, call_once, once_wrapper, singleton, reset_on_pickle
Expand Down Expand Up @@ -39,6 +39,7 @@
'package',
'kwargs',
'arguments',
'override',

# option
'Option',
Expand Down
4 changes: 2 additions & 2 deletions lazyllm/common/bind.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,8 +104,8 @@ def __init__(self, __bind_func=_None, *args, **kw):
self._f = __bind_func() if isinstance(__bind_func, type) and __bind_func is not Bind._None else __bind_func
self._args = args
self._kw = kw
self._has_root = any([isinstance(a, AttrTree) for a in args])
self._has_root = self._has_root or any([isinstance(v, AttrTree) for k, v in kw.items()])
self._has_root = (any([isinstance(a, AttrTree) for a in args])
or any([isinstance(v, AttrTree) for v in kw.values()]))

def __ror__(self, __value: Callable):
if self._f is not Bind._None: self._args = (self._f,) + self._args
Expand Down
6 changes: 6 additions & 0 deletions lazyllm/common/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@
_F = typing.TypeVar("_F", bound=Callable[..., Any])
def final(f: _F) -> _F: return f

try:
from typing import override
except ImportError:
def override(func: Callable):
return func


class FlatList(list):
def absorb(self, item):
Expand Down
12 changes: 9 additions & 3 deletions lazyllm/common/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,9 +121,9 @@ def __reduce__(self):


class Globals(object):
__global_attrs__ = ThreadSafeDict(chat_history={}, global_parameters={},
bind_args={}, tool_delimiter="<|tool_calls|>",
lazyllm_files={})
__global_attrs__ = ThreadSafeDict(
chat_history={}, global_parameters={}, bind_args={}, tool_delimiter="<|tool_calls|>", lazyllm_files={}, usage={}
)

def __init__(self):
self.__data = ThreadSafeDict()
Expand Down Expand Up @@ -176,6 +176,12 @@ def __getitem__(self, __key: str):
except KeyError:
raise KeyError(f'Cannot find key {__key}, current session-id is {self._sid}')

def get(self, __key: str, default: Any = None):
try:
return self[__key]
except KeyError:
return default

def __setattr__(self, __name: str, __value: Any):
if __name in __class__.__global_attrs__:
self[__name] = __value
Expand Down
26 changes: 19 additions & 7 deletions lazyllm/common/queue.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import os
from typing import Type
from lazyllm.thirdparty import redis
from filelock import FileLock

config.add("default_fsqueue", str, "sqlite", "DEFAULT_FSQUEUE")
config.add("fsqredis_url", str, "", "FSQREDIS_URL")
Expand Down Expand Up @@ -64,16 +65,27 @@ def _size(self, id): pass
@abstractmethod
def _clear(self, id): pass

# true means one connection can be used in multiple thread
# refer to: https://sqlite.org/compile.html#threadsafe
def sqlite3_check_threadsafety() -> bool:
conn = sqlite3.connect(":memory:")
res = conn.execute("""
select * from pragma_compile_options
where compile_options like 'THREADSAFE=%'
""").fetchall()
conn.close()
return True if res[0][0] == 'THREADSAFE=1' else False

class SQLiteQueue(FileSystemQueue):
def __init__(self, klass='__default__'):
super(__class__, self).__init__(klass=klass)
self.db_path = os.path.expanduser(os.path.join(config['home'], '.lazyllm_filesystem_queue.db'))
self._lock = threading.Lock()
self._lock = FileLock(self.db_path + '.lock')
self._check_same_thread = not sqlite3_check_threadsafety()
self._initialize_db()

def _initialize_db(self):
with sqlite3.connect(self.db_path) as conn:
with self._lock, sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
cursor.execute('''
CREATE TABLE IF NOT EXISTS queue (
Expand All @@ -87,7 +99,7 @@ def _initialize_db(self):

def _enqueue(self, id, message):
with self._lock:
with sqlite3.connect(self.db_path) as conn:
with sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT MAX(position) FROM queue WHERE id = ?
Expand All @@ -103,7 +115,7 @@ def _enqueue(self, id, message):
def _dequeue(self, id, limit=None):
"""Retrieve and remove all messages from the queue."""
with self._lock:
with sqlite3.connect(self.db_path) as conn:
with sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
if limit:
cursor.execute('SELECT message, position FROM queue WHERE id = ? '
Expand All @@ -123,7 +135,7 @@ def _dequeue(self, id, limit=None):

def _peek(self, id):
with self._lock:
with sqlite3.connect(self.db_path) as conn:
with sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT message FROM queue WHERE id = ? ORDER BY position ASC LIMIT 1
Expand All @@ -135,7 +147,7 @@ def _peek(self, id):

def _size(self, id):
with self._lock:
with sqlite3.connect(self.db_path) as conn:
with sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
cursor.execute('''
SELECT COUNT(*) FROM queue WHERE id = ?
Expand All @@ -144,7 +156,7 @@ def _size(self, id):

def _clear(self, id):
with self._lock:
with sqlite3.connect(self.db_path) as conn:
with sqlite3.connect(self.db_path, check_same_thread=self._check_same_thread) as conn:
cursor = conn.cursor()
cursor.execute('''
DELETE FROM queue WHERE id = ?
Expand Down
Loading

0 comments on commit e6e4c48

Please sign in to comment.