diff --git a/lazyllm/tools/rag/dataReader.py b/lazyllm/tools/rag/dataReader.py index 335507b5..319525e0 100644 --- a/lazyllm/tools/rag/dataReader.py +++ b/lazyllm/tools/rag/dataReader.py @@ -220,7 +220,7 @@ def load_file(input_file: Path, file_metadata: Callable[[str], Dict], file_extra with fs.open(input_file, encoding=encoding) as f: data = f.read().decode(encoding) - doc = DocNode(content=data, metadata=metadata or {}) + doc = DocNode(text=data, metadata=metadata or {}) doc.docpath = str(input_file) if filename_as_id: doc.uid = str(input_file) documents.append(doc) diff --git a/lazyllm/tools/rag/doc_node.py b/lazyllm/tools/rag/doc_node.py index 6dc564cd..5e4cb2cc 100644 --- a/lazyllm/tools/rag/doc_node.py +++ b/lazyllm/tools/rag/doc_node.py @@ -19,9 +19,12 @@ class DocNode: def __init__(self, uid: Optional[str] = None, content: Optional[Union[str, List[Any]]] = None, group: Optional[str] = None, embedding: Optional[Dict[str, List[float]]] = None, parent: Optional["DocNode"] = None, metadata: Optional[Dict[str, Any]] = None, - global_metadata: Optional[Dict[str, Any]] = None): + global_metadata: Optional[Dict[str, Any]] = None, *, text: Optional[str] = None): + if text and content: + raise ValueError(f'`text` and `content` cannot be set at the same time.') + self.uid: str = uid if uid else str(uuid.uuid4()) - self.content: Optional[Union[str, List[Any]]] = content + self.content: Optional[Union[str, List[Any]]] = content if content else text self.group: Optional[str] = group self.embedding: Optional[Dict[str, List[float]]] = embedding or {} self._metadata: Dict[str, Any] = metadata or {} diff --git a/lazyllm/tools/rag/readers/docxReader.py b/lazyllm/tools/rag/readers/docxReader.py index daf71088..7e0ab3b6 100644 --- a/lazyllm/tools/rag/readers/docxReader.py +++ b/lazyllm/tools/rag/readers/docxReader.py @@ -22,4 +22,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, metadata = {"file_name": file.name} if extra_info is not None: metadata.update(extra_info) - return [DocNode(content=text, metadata=metadata)] + return [DocNode(text=text, metadata=metadata)] diff --git a/lazyllm/tools/rag/readers/epubReader.py b/lazyllm/tools/rag/readers/epubReader.py index 6f7dce3b..747c3402 100644 --- a/lazyllm/tools/rag/readers/epubReader.py +++ b/lazyllm/tools/rag/readers/epubReader.py @@ -30,4 +30,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, if item.get_type() == ebooklib.ITEM_DOCUMENT: text_list.append(html2text.html2text(item.get_content().decode("utf-8"))) text = "\n".join(text_list) - return [DocNode(content=text, metadata=extra_info or {})] + return [DocNode(text=text, metadata=extra_info or {})] diff --git a/lazyllm/tools/rag/readers/hwpReader.py b/lazyllm/tools/rag/readers/hwpReader.py index 4338e6eb..35f33b9c 100644 --- a/lazyllm/tools/rag/readers/hwpReader.py +++ b/lazyllm/tools/rag/readers/hwpReader.py @@ -36,14 +36,14 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, if self._is_valid(file_dir) is False: raise Exception("Not Valid HwpFile") result_text = self._get_text(load_file, file_dir) - return [DocNode(content=result_text, metadata=extra_info or {})] + return [DocNode(text=result_text, metadata=extra_info or {})] def _is_valid(self, dirs: List[str]) -> bool: if [self._FILE_HEADER_SECTION] not in dirs: return False return [self._HWP_SUMMARY_SECTION] in dirs def _text_to_docnode(self, text: str, extra_info: Optional[Dict] = None) -> DocNode: - return DocNode(content=text, metadata=extra_info or {}) + return DocNode(text=text, metadata=extra_info or {}) def _get_text(self, load_file: Any, file_dirs: List[str]) -> str: sections = self._get_body_sections(file_dirs) diff --git a/lazyllm/tools/rag/readers/imageReader.py b/lazyllm/tools/rag/readers/imageReader.py index fd35b0b8..021e5c10 100644 --- a/lazyllm/tools/rag/readers/imageReader.py +++ b/lazyllm/tools/rag/readers/imageReader.py @@ -99,4 +99,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, model = cast(pytesseract, self._parser_config['model']) text_str = model.image_to_string(image, **self._pytesseract_model_kwargs) - return [DocNode(content=text_str, metadata=extra_info or {})] + return [DocNode(text=text_str, metadata=extra_info or {})] diff --git a/lazyllm/tools/rag/readers/ipynbReader.py b/lazyllm/tools/rag/readers/ipynbReader.py index e87a8a0c..90e0cc5c 100644 --- a/lazyllm/tools/rag/readers/ipynbReader.py +++ b/lazyllm/tools/rag/readers/ipynbReader.py @@ -31,7 +31,7 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, splits = re.split(r"In\[\d+\]:", doc_str) splits.pop(0) - if self._concatenate: docs = [DocNode(content="\n\n".join(splits), metadata=extra_info or {})] - else: docs = [DocNode(content=s, metadata=extra_info or {}) for s in splits] + if self._concatenate: docs = [DocNode(text="\n\n".join(splits), metadata=extra_info or {})] + else: docs = [DocNode(text=s, metadata=extra_info or {}) for s in splits] return docs diff --git a/lazyllm/tools/rag/readers/markdownReader.py b/lazyllm/tools/rag/readers/markdownReader.py index 7771da98..0184576b 100644 --- a/lazyllm/tools/rag/readers/markdownReader.py +++ b/lazyllm/tools/rag/readers/markdownReader.py @@ -61,7 +61,7 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, if not isinstance(file, Path): file = Path(file) tups = self._parse_tups(file, fs=fs) - results = [DocNode(content=value if header is None else f"\n\n{header}\n{value}", metadata=extra_info or {}) + results = [DocNode(text=value if header is None else f"\n\n{header}\n{value}", metadata=extra_info or {}) for header, value in tups] return results diff --git a/lazyllm/tools/rag/readers/mboxreader.py b/lazyllm/tools/rag/readers/mboxreader.py index e27eb216..3fab832c 100644 --- a/lazyllm/tools/rag/readers/mboxreader.py +++ b/lazyllm/tools/rag/readers/mboxreader.py @@ -65,4 +65,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, i += 1 if self._max_count > 0 and i >= self._max_count: break - return [DocNode(content=result, metadata=extra_info or {}) for result in results] + return [DocNode(text=result, metadata=extra_info or {}) for result in results] diff --git a/lazyllm/tools/rag/readers/pandasReader.py b/lazyllm/tools/rag/readers/pandasReader.py index b7306a42..6082b6b4 100644 --- a/lazyllm/tools/rag/readers/pandasReader.py +++ b/lazyllm/tools/rag/readers/pandasReader.py @@ -28,8 +28,8 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, text_list = df.apply(lambda row: (self._col_joiner).join(row.astype(str).tolist()), axis=1).tolist() - if self._concat_rows: return [DocNode(content=(self._row_joiner).join(text_list), metadata=extra_info or {})] - else: return [DocNode(content=text, metadata=extra_info or {}) for text in text_list] + if self._concat_rows: return [DocNode(text=(self._row_joiner).join(text_list), metadata=extra_info or {})] + else: return [DocNode(text=text, metadata=extra_info or {}) for text in text_list] class PandasExcelReader(LazyLLMReaderBase): def __init__(self, concat_rows: bool = True, sheet_name: Optional[str] = None, @@ -58,14 +58,14 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, df = dfs.fillna("") text_list = (df.astype(str).apply(lambda row: " ".join(row.values), axis=1).tolist()) - if self._concat_rows: documents.append(DocNode(content="\n".join(text_list), metadata=extra_info or {})) - else: documents.extend([DocNode(content=text, metadata=extra_info or {}) for text in text_list]) + if self._concat_rows: documents.append(DocNode(text="\n".join(text_list), metadata=extra_info or {})) + else: documents.extend([DocNode(text=text, metadata=extra_info or {}) for text in text_list]) else: for df in dfs.values(): df = df.fillna("") text_list = (df.astype(str).apply(lambda row: " ".join(row), axis=1).tolist()) - if self._concat_rows: documents.append(DocNode(content="\n".join(text_list), metadata=extra_info or {})) - else: documents.extend([DocNode(content=text, metadata=extra_info or {}) for text in text_list]) + if self._concat_rows: documents.append(DocNode(text="\n".join(text_list), metadata=extra_info or {})) + else: documents.extend([DocNode(text=text, metadata=extra_info or {}) for text in text_list]) return documents diff --git a/lazyllm/tools/rag/readers/pdfReader.py b/lazyllm/tools/rag/readers/pdfReader.py index fc3a3636..a0a4043f 100644 --- a/lazyllm/tools/rag/readers/pdfReader.py +++ b/lazyllm/tools/rag/readers/pdfReader.py @@ -34,12 +34,12 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, metadata = {"file_name": file.name} if extra_info is not None: metadata.update(extra_info) text = "\n".join(pdf.pages[page].extract_text() for page in range(num_pages)) - docs.append(DocNode(content=text, metadata=metadata)) + docs.append(DocNode(text=text, metadata=metadata)) else: for page in range(num_pages): page_text = pdf.pages[page].extract_text() page_label = pdf.page_labels[page] metadata = {"page_label": page_label, "file_name": file.name} if extra_info is not None: metadata.update(extra_info) - docs.append(DocNode(content=page_text, metadata=metadata)) + docs.append(DocNode(text=page_text, metadata=metadata)) return docs diff --git a/lazyllm/tools/rag/readers/pptxReader.py b/lazyllm/tools/rag/readers/pptxReader.py index 92292768..3ae216e8 100644 --- a/lazyllm/tools/rag/readers/pptxReader.py +++ b/lazyllm/tools/rag/readers/pptxReader.py @@ -78,4 +78,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, os.unlink(f.name) if hasattr(shape, "text"): result += f"{shape.text}\n" - return [DocNode(content=result, metadata=extra_info or {})] + return [DocNode(text=result, metadata=extra_info or {})] diff --git a/lazyllm/tools/rag/readers/videoAudioReader.py b/lazyllm/tools/rag/readers/videoAudioReader.py index 25abf4ee..bdd41e1d 100644 --- a/lazyllm/tools/rag/readers/videoAudioReader.py +++ b/lazyllm/tools/rag/readers/videoAudioReader.py @@ -45,4 +45,4 @@ def _load_data(self, file: Path, extra_info: Optional[Dict] = None, result = model.transcribe(str(file)) transcript = result['text'] - return [DocNode(content=transcript, metadata=extra_info or {})] + return [DocNode(text=transcript, metadata=extra_info or {})] diff --git a/lazyllm/tools/rag/transform.py b/lazyllm/tools/rag/transform.py index 78dfd2a4..439e9615 100644 --- a/lazyllm/tools/rag/transform.py +++ b/lazyllm/tools/rag/transform.py @@ -46,7 +46,7 @@ def build_nodes_from_splits( if not text_chunk: continue node = DocNode( - content=text_chunk, + text=text_chunk, group=node_group, parent=doc, ) @@ -104,7 +104,7 @@ def __call__(self, node: DocNode, **kwargs: Any) -> List[DocNode]: # Parent and child should not be set here. results = self.transform(node, **kwargs) if isinstance(results, (DocNode, str)): results = [results] - return [DocNode(content=chunk) if isinstance(chunk, str) else chunk for chunk in results if chunk] + return [DocNode(text=chunk) if isinstance(chunk, str) else chunk for chunk in results if chunk] def make_transform(t): diff --git a/tests/advanced_tests/standard_test/test_reranker.py b/tests/advanced_tests/standard_test/test_reranker.py index 33c69c9b..b705e496 100644 --- a/tests/advanced_tests/standard_test/test_reranker.py +++ b/tests/advanced_tests/standard_test/test_reranker.py @@ -8,11 +8,11 @@ class TestReranker(unittest.TestCase): def setUp(self): - self.doc1 = DocNode(content="This is a test document with the keyword apple.") + self.doc1 = DocNode(text="This is a test document with the keyword apple.") self.doc2 = DocNode( - content="This is another test document with the keyword banana." + text="This is another test document with the keyword banana." ) - self.doc3 = DocNode(content="This document contains the keyword cherry.") + self.doc3 = DocNode(text="This document contains the keyword cherry.") self.nodes = [self.doc1, self.doc2, self.doc3] self.query = "test query" @@ -63,7 +63,7 @@ def CustomReranker(node, **kwargs): return node return None - custom_doc = DocNode(content="This document contains custom keyword.") + custom_doc = DocNode(text="This document contains custom keyword.") nodes = [self.doc1, self.doc2, self.doc3, custom_doc] reranker = Reranker(name="CustomReranker") diff --git a/tests/basic_tests/test_bm25.py b/tests/basic_tests/test_bm25.py index c01d99fa..a59a01b9 100644 --- a/tests/basic_tests/test_bm25.py +++ b/tests/basic_tests/test_bm25.py @@ -7,9 +7,9 @@ class TestBM25(unittest.TestCase): def setUp(self): self.nodes = [ - DocNode(content="This is a test document."), - DocNode(content="This document is for testing BM25."), - DocNode(content="BM25 is a ranking function used in information retrieval."), + DocNode(text="This is a test document."), + DocNode(text="This document is for testing BM25."), + DocNode(text="BM25 is a ranking function used in information retrieval."), ] self.bm25_en = BM25(self.nodes, language="en", topk=2) @@ -36,16 +36,16 @@ def test_retrieve(self): class TestBM25Chinese(unittest.TestCase): def setUp(self): self.nodes = [ - DocNode(content="这是一个测试文档。这个文档用于测试BM25。"), + DocNode(text="这是一个测试文档。这个文档用于测试BM25。"), DocNode( - content="BM25是一种在信息检索中使用的排序函数。信息检索系统通过BM25算法来排序文档和分数。" + text="BM25是一种在信息检索中使用的排序函数。信息检索系统通过BM25算法来排序文档和分数。" ), - DocNode(content="中文文档的测试内容。测试文档中包含多个句子。"), + DocNode(text="中文文档的测试内容。测试文档中包含多个句子。"), DocNode( - content="这个测试是为了验证BM25在中文文档中的表现。我们需要对多个文档进行排序测试。" + text="这个测试是为了验证BM25在中文文档中的表现。我们需要对多个文档进行排序测试。" ), DocNode( - content="文档的内容可以影响BM25的评分。排序函数的性能对于信息检索非常重要。" + text="文档的内容可以影响BM25的评分。排序函数的性能对于信息检索非常重要。" ), ] diff --git a/tests/basic_tests/test_doc_node.py b/tests/basic_tests/test_doc_node.py index 4980fce4..e49aff18 100644 --- a/tests/basic_tests/test_doc_node.py +++ b/tests/basic_tests/test_doc_node.py @@ -9,7 +9,7 @@ def setup_method(self): self.metadata = {"author": "John Doe", "date": "2023-07-01"} self.embedding = [0.1, 0.2, 0.3] self.node = DocNode( - content=self.text, + text=self.text, embedding={"default": self.embedding}, ) self.node.metadata = self.metadata @@ -71,7 +71,7 @@ def test_get_metadata_str(self): def test_root_node(self): """Test the root_node property.""" - child_node = DocNode(content="Child node", parent=self.node) + child_node = DocNode(text="Child node", parent=self.node) assert child_node.root_node == self.node def test_metadata_property(self): diff --git a/tests/basic_tests/test_document.py b/tests/basic_tests/test_document.py index 97fd0a36..816aa403 100644 --- a/tests/basic_tests/test_document.py +++ b/tests/basic_tests/test_document.py @@ -25,7 +25,7 @@ class TestDocImpl(unittest.TestCase): def setUp(self): self.mock_embed = MagicMock() self.mock_directory_reader = MagicMock() - mock_node = DocNode(group=LAZY_ROOT_NAME, content="dummy text") + mock_node = DocNode(group=LAZY_ROOT_NAME, text="dummy text") mock_node._global_metadata = {RAG_DOC_PATH: "dummy_file.txt"} self.mock_directory_reader.load_data.return_value = [mock_node] @@ -72,7 +72,7 @@ def test_add_files(self): assert self.doc_impl.store is None self.doc_impl._lazy_init() assert len(self.doc_impl.store.get_nodes(LAZY_ROOT_NAME)) == 1 - new_doc = DocNode(content="new dummy text", group=LAZY_ROOT_NAME) + new_doc = DocNode(text="new dummy text", group=LAZY_ROOT_NAME) new_doc._global_metadata = {RAG_DOC_PATH: "new_file.txt"} self.mock_directory_reader.load_data.return_value = [new_doc] self.doc_impl._add_files(["new_file.txt"]) @@ -210,6 +210,7 @@ def test_delete_files_in_store(self): response = httpx.post(url, params=params, files=files, timeout=10) assert response.status_code == 200 and response.json().get('code') == 200, response.json() ids = response.json().get('data')[0] + lazyllm.LOG.error(f'debug!!! ids -> {ids}') assert len(ids) == 2 time.sleep(20) # waiting for worker thread to update newly uploaded files diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index 342083ab..f7a52822 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -7,14 +7,14 @@ class YmlReader(ReaderBase): def _load_data(self, file, extra_info=None, fs=None): with open(file, 'r') as f: data = f.read() - node = DocNode(content=data, metadata=extra_info or {}) + node = DocNode(text=data, metadata=extra_info or {}) node.content = "Call the class YmlReader." return [node] def processYml(file, extra_info=None): with open(file, 'r') as f: data = f.read() - node = DocNode(content=data, metadata=extra_info or {}) + node = DocNode(text=data, metadata=extra_info or {}) node.content = "Call the function processYml." return [node] diff --git a/tests/basic_tests/test_store.py b/tests/basic_tests/test_store.py index edcb0f01..711b280e 100644 --- a/tests/basic_tests/test_store.py +++ b/tests/basic_tests/test_store.py @@ -46,7 +46,7 @@ def setUp(self): embed_dims=self.embed_dims, dir=self.store_dir) self.store.update_nodes( - [DocNode(uid="1", content="text1", group=LAZY_ROOT_NAME, parent=None)], + [DocNode(uid="1", text="text1", group=LAZY_ROOT_NAME, parent=None)], ) def tearDown(self): @@ -56,8 +56,8 @@ def test_initialization(self): self.assertEqual(set(self.store._collections.keys()), set(self.node_groups)) def test_update_nodes(self): - node1 = DocNode(uid="1", content="text1", group="group1") - node2 = DocNode(uid="2", content="text2", group="group2") + node1 = DocNode(uid="1", text="text1", group="group1") + node2 = DocNode(uid="2", text="text2", group="group2") self.store.update_nodes([node1, node2]) collection = self.store._collections["group1"] self.assertEqual(set(collection.peek(collection.count())["ids"]), set(["1", "2"])) @@ -65,8 +65,8 @@ def test_update_nodes(self): self.assertEqual(nodes, [node1]) def test_remove_group_nodes(self): - node1 = DocNode(uid="1", content="text1", group="group1") - node2 = DocNode(uid="2", content="text2", group="group2") + node1 = DocNode(uid="1", text="text1", group="group1") + node2 = DocNode(uid="2", text="text2", group="group2") self.store.update_nodes([node1, node2]) collection = self.store._collections["group1"] self.assertEqual(collection.peek(collection.count())["ids"], ["1", "2"]) @@ -75,8 +75,8 @@ def test_remove_group_nodes(self): def test_load_store(self): # Set up initial data to be loaded - node1 = DocNode(uid="1", content="text1", group="group1", parent=None) - node2 = DocNode(uid="2", content="text2", group="group1", parent=node1) + node1 = DocNode(uid="1", text="text1", group="group1", parent=None) + node2 = DocNode(uid="2", text="text2", group="group1", parent=node1) self.store.update_nodes([node1, node2]) # Reset store and load from "persistent" storage @@ -90,8 +90,8 @@ def test_load_store(self): self.assertEqual(nodes[1].parent.uid, "1") def test_insert_dict_as_sparse_embedding(self): - node1 = DocNode(uid="1", content="text1", group="group1", embedding={'default': {1: 10, 2: 20}}) - node2 = DocNode(uid="2", content="text2", group="group1", embedding={'default': {0: 30, 2: 50}}) + node1 = DocNode(uid="1", text="text1", group="group1", embedding={'default': {1: 10, 2: 20}}) + node2 = DocNode(uid="2", text="text2", group="group1", embedding={'default': {0: 30, 2: 50}}) orig_embedding_dict = { node1.uid: [0, 10, 20], node2.uid: [30, 0, 50], @@ -112,16 +112,16 @@ def test_all_groups(self): self.assertEqual(set(self.store.all_groups()), set(self.node_groups)) def test_query(self): - node1 = DocNode(uid="1", content="text1", group="group1", parent=None) - node2 = DocNode(uid="2", content="text2", group="group1", parent=node1) + node1 = DocNode(uid="1", text="text1", group="group1", parent=None) + node2 = DocNode(uid="2", text="text2", group="group1", parent=node1) self.store.update_nodes([node1, node2]) res = self.store.query(query='text1', group_name='group1', embed_keys=['default'], topk=2, similarity_name='cosine', similarity_cut_off=0.000001) self.assertEqual(set([node1, node2]), set(res)) def test_group_others(self): - node1 = DocNode(uid="1", content="text1", group="group1", parent=None) - node2 = DocNode(uid="2", content="text2", group="group1", parent=node1) + node1 = DocNode(uid="1", text="text1", group="group1", parent=None) + node2 = DocNode(uid="2", text="text2", group="group1", parent=node1) self.store.update_nodes([node1, node2]) self.assertEqual(self.store.is_group_active("group1"), True) self.assertEqual(self.store.is_group_active("group2"), False) @@ -133,8 +133,8 @@ def setUp(self): } self.node_groups = [LAZY_ROOT_NAME, "group1", "group2"] self.store = MapStore(node_groups=self.node_groups, embed=self.mock_embed) - self.node1 = DocNode(uid="1", content="text1", group="group1", parent=None) - self.node2 = DocNode(uid="2", content="text2", group="group1", parent=self.node1) + self.node1 = DocNode(uid="1", text="text1", group="group1", parent=None) + self.node2 = DocNode(uid="2", text="text2", group="group1", parent=self.node1) def test_update_nodes(self): self.store.update_nodes([self.node1, self.node2]) @@ -211,14 +211,14 @@ def setUp(self): embed_dims=self.embed_dims, global_metadata_desc=self.global_metadata_desc, uri=self.store_file) - self.node1 = DocNode(uid="1", content="text1", group="group1", parent=None, + self.node1 = DocNode(uid="1", text="text1", group="group1", parent=None, embedding={"vec1": [8.0, 9.0, 10.0], "vec2": [11.0, 12.0, 13.0, 14.0, 15.0]}, metadata={'comment': 'comment1'}, global_metadata={'comment': 'comment3', 'signature': 'node1', 'tags': [1, 3, 5]}) - self.node2 = DocNode(uid="2", content="text2", group="group1", parent=self.node1, + self.node2 = DocNode(uid="2", text="text2", group="group1", parent=self.node1, embedding={"vec1": [100.0, 200.0, 300.0], "vec2": [400.0, 500.0, 600.0, 700.0, 800.0]}, metadata={'comment': 'comment2', 'signature': 'node2'}) - self.node3 = DocNode(uid="3", content="text3", group="group1", parent=None, + self.node3 = DocNode(uid="3", text="text3", group="group1", parent=None, embedding={"vec1": [4.0, 5.0, 6.0], "vec2": [16.0, 17.0, 18.0, 19.0, 20.0]}, metadata={'comment': 'comment3', 'signature': 'node3'}, global_metadata={'tags': [1, 2, 3]}) @@ -282,8 +282,17 @@ def test_reload(self): self.store = MilvusStore(group_embed_keys=self.group_embed_keys, embed=self.mock_embed, embed_dims=self.embed_dims, global_metadata_desc=self.global_metadata_desc, uri=self.store_file) - self.assertEqual(set([node.uid for node in self.store.get_nodes('group1')]), - set([self.node1.uid, self.node2.uid, self.node3.uid])) + + nodes = self.store.get_nodes('group1') + orig_nodes = [self.node1, self.node2, self.node3] + self.assertEqual(set([node.uid for node in nodes]), set([node.uid for node in orig_nodes])) + + for node in nodes: + for orig_node in orig_nodes: + if node.uid == orig_node.uid: + self.assertEqual(node.text, orig_node.text) + break + # XXX `array_contains_any` is not supported in local(aka lite) mode. skip this ut def _test_query_with_array_filter(self): diff --git a/tests/basic_tests/test_transform.py b/tests/basic_tests/test_transform.py index 8815eb1e..47f60d60 100644 --- a/tests/basic_tests/test_transform.py +++ b/tests/basic_tests/test_transform.py @@ -10,7 +10,7 @@ def setup_method(self): def test_forward(self): text = """ Before college the two main things I worked on, outside of school, were writing and programming. I didn't write essays. I wrote what beginning writers were supposed to write then, and probably still are: short stories. My stories were awful. They had hardly any plot, just characters with strong feelings, which I imagined made them deep.""" # noqa: E501 - docs = [DocNode(content=text)] + docs = [DocNode(text=text)] result = self.splitter.batch_forward(docs, node_group='default') result_texts = [n.get_text() for n in result]