Skip to content

Commit

Permalink
add test case for DocManager server and some minor changes (#355)
Browse files Browse the repository at this point in the history
  • Loading branch information
ouonline authored Nov 21, 2024
1 parent e311e8d commit 7dcba03
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 39 deletions.
2 changes: 1 addition & 1 deletion examples/rag_milvus_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

class TmpDir:
def __init__(self):
self.root_dir = os.path.expanduser(os.path.join(config['home'], 'rag_for_ut'))
self.root_dir = os.path.expanduser(os.path.join(config['home'], 'rag_for_example_ut'))
self.rag_dir = os.path.join(self.root_dir, 'rag_master')
os.makedirs(self.rag_dir, exist_ok=True)
self.store_file = os.path.join(self.root_dir, "milvus.db")
Expand Down
7 changes: 0 additions & 7 deletions lazyllm/tools/rag/map_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,10 +87,3 @@ def get_index(self, type: Optional[str] = None) -> Optional[IndexBase]:
if type is None:
type = 'default'
return self._name2index.get(type)

def find_node_by_uid(self, uid: str) -> Optional[DocNode]:
for docs in self._group2docs.values():
doc = docs.get(uid)
if doc:
return doc
return None
99 changes: 68 additions & 31 deletions tests/basic_tests/test_document.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,23 @@
import lazyllm
from lazyllm.tools.rag.doc_impl import DocImpl
from lazyllm.tools.rag.utils import _FileNodeIndex
from lazyllm.tools.rag.transform import SentenceSplitter
from lazyllm.tools.rag.store_base import LAZY_ROOT_NAME
from lazyllm.tools.rag.doc_node import DocNode
from lazyllm.tools.rag.global_metadata import RAG_DOC_PATH
from lazyllm.tools.rag.global_metadata import RAG_DOC_PATH, RAG_DOC_ID
from lazyllm.tools.rag import Document, Retriever, TransformArgs, AdaptiveTransform
from lazyllm.tools.rag.doc_manager import DocManager
from lazyllm.tools.rag.utils import DocListManager
from lazyllm.launcher import cleanup
from lazyllm import config
from unittest.mock import MagicMock
import unittest
import httpx
import os
import shutil
import io
import re
import json
import time


class TestDocImpl(unittest.TestCase):
Expand Down Expand Up @@ -167,36 +176,64 @@ def test_doc_web_module(self):
assert response.status_code == 200
doc.stop()

class TestFileNodeIndex(unittest.TestCase):
class TmpDir:
def __init__(self):
self.root_dir = os.path.expanduser(os.path.join(config['home'], 'rag_for_document_ut'))
self.rag_dir = os.path.join(self.root_dir, 'rag_master')
os.makedirs(self.rag_dir, exist_ok=True)

def __del__(self):
shutil.rmtree(self.root_dir)

class TestDocumentServer(unittest.TestCase):
def setUp(self):
self.index = _FileNodeIndex()
self.node1 = DocNode(uid='1', group=LAZY_ROOT_NAME, global_metadata={RAG_DOC_PATH: "d1"})
self.node2 = DocNode(uid='2', group=LAZY_ROOT_NAME, global_metadata={RAG_DOC_PATH: "d2"})
self.files = [self.node1.global_metadata[RAG_DOC_PATH], self.node2.global_metadata[RAG_DOC_PATH]]

def test_update(self):
self.index.update([self.node1, self.node2])

nodes = self.index.query(self.files)
assert len(nodes) == len(self.files)

ret = [node.global_metadata[RAG_DOC_PATH] for node in nodes]
assert set(ret) == set(self.files)

def test_remove(self):
self.index.update([self.node1, self.node2])
self.index.remove([self.node2.uid])
ret = self.index.query([self.node2.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 0

def test_query(self):
self.index.update([self.node1, self.node2])
ret = self.index.query([self.node2.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 1
assert ret[0] is self.node2
ret = self.index.query([self.node1.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 1
assert ret[0] is self.node1
self.dir = TmpDir()
self.dlm = DocListManager(path=self.dir.rag_dir, name=None)
self.dlm.init_tables()

self.doc_impl = DocImpl(embed=MagicMock(), dlm=self.dlm)
self.doc_impl._lazy_init()

self.server = lazyllm.ServerModule(DocManager(self.dlm))
self.server.start()

url_pattern = r'(http://\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}:\d+)'
self.doc_server_addr = re.findall(url_pattern, self.server._url)[0]

def test_delete_files_in_store(self):
files = [('files', ('test1.txt', io.BytesIO(b"John's house is in Beijing"), 'text/palin')),
('files', ('test2.txt', io.BytesIO(b"John's house is in Shanghai"), 'text/plain'))]
metadatas = [{"comment": "comment1"}, {"signature": "signature2"}]
params = dict(override='true', metadatas=json.dumps(metadatas))

url = f'{self.doc_server_addr}/upload_files'
response = httpx.post(url, params=params, files=files, timeout=10)
assert response.status_code == 200 and response.json().get('code') == 200, response.json()
ids = response.json().get('data')[0]
lazyllm.LOG.error(f'debug!!! ids -> {ids}')
assert len(ids) == 2

time.sleep(20) # waiting for worker thread to update newly uploaded files

# make sure that ids are written into the store
nodes = self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)
for node in nodes:
if node.global_metadata[RAG_DOC_PATH].endswith('test1.txt'):
test1_docid = node.global_metadata[RAG_DOC_ID]
elif node.global_metadata[RAG_DOC_PATH].endswith('test2.txt'):
test2_docid = node.global_metadata[RAG_DOC_ID]
assert test1_docid and test2_docid
assert set([test1_docid, test2_docid]) == set(ids)

url = f'{self.doc_server_addr}/delete_files'
response = httpx.post(url, json=dict(file_ids=[test1_docid]))
assert response.status_code == 200 and response.json().get('code') == 200

time.sleep(20) # waiting for worker thread to delete files

nodes = self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)
assert len(nodes) == 1
assert nodes[0].global_metadata[RAG_DOC_ID] == test2_docid

if __name__ == "__main__":
unittest.main()
35 changes: 35 additions & 0 deletions tests/basic_tests/test_rag_utils.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
from lazyllm.tools.rag.utils import generic_process_filters
from lazyllm.tools.rag.doc_node import DocNode
from lazyllm.tools.rag.utils import _FileNodeIndex
from lazyllm.tools.rag.store_base import LAZY_ROOT_NAME
from lazyllm.tools.rag.global_metadata import RAG_DOC_PATH
import unittest

class TestRagUtils:
def test_generic_process_filters(self):
Expand All @@ -19,3 +23,34 @@ def test_generic_process_filters(self):

res = generic_process_filters(nodes, {'k2': 'v6'})
assert len(res) == 0

class TestFileNodeIndex(unittest.TestCase):
def setUp(self):
self.index = _FileNodeIndex()
self.node1 = DocNode(uid='1', group=LAZY_ROOT_NAME, global_metadata={RAG_DOC_PATH: "d1"})
self.node2 = DocNode(uid='2', group=LAZY_ROOT_NAME, global_metadata={RAG_DOC_PATH: "d2"})
self.files = [self.node1.global_metadata[RAG_DOC_PATH], self.node2.global_metadata[RAG_DOC_PATH]]

def test_update(self):
self.index.update([self.node1, self.node2])

nodes = self.index.query(self.files)
assert len(nodes) == len(self.files)

ret = [node.global_metadata[RAG_DOC_PATH] for node in nodes]
assert set(ret) == set(self.files)

def test_remove(self):
self.index.update([self.node1, self.node2])
self.index.remove([self.node2.uid])
ret = self.index.query([self.node2.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 0

def test_query(self):
self.index.update([self.node1, self.node2])
ret = self.index.query([self.node2.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 1
assert ret[0] is self.node2
ret = self.index.query([self.node1.global_metadata[RAG_DOC_PATH]])
assert len(ret) == 1
assert ret[0] is self.node1

0 comments on commit 7dcba03

Please sign in to comment.