From 3be037bf8b2b1682cd9457c83318c42f53fc13f0 Mon Sep 17 00:00:00 2001 From: wangjian Date: Thu, 8 Aug 2024 20:45:41 +0800 Subject: [PATCH 01/10] add lazyllm before group --- README.CN.md | 4 ++-- README.md | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/README.CN.md b/README.CN.md index 8942563c7..6aca225dd 100644 --- a/README.CN.md +++ b/README.CN.md @@ -269,9 +269,9 @@ def test(input): def test_cmd(input): return f'echo input is {input}' -# >>> demo.test()(1) +# >>> lazyllm.demo.test()(1) # 'input is 1' -# >>> demo.test_cmd(launcher=launchers.slurm)(2) +# >>> lazyllm.demo.test_cmd(launcher=launchers.slurm)(2) # Command: srun -p pat_rd -N 1 --job-name=xf488db3 -n1 bash -c 'echo input is 2' ``` diff --git a/README.md b/README.md index 57dbd7232..345338778 100644 --- a/README.md +++ b/README.md @@ -276,9 +276,9 @@ def test(input): def test_cmd(input): return f'echo input is {input}' -# >>> demo.test()(1) +# >>> lazyllm.demo.test()(1) # 'input is 1' -# >>> demo.test_cmd(launcher=launchers.slurm)(2) +# >>> lazyllm.demo.test_cmd(launcher=launchers.slurm)(2) # Command: srun -p pat_rd -N 1 --job-name=xf488db3 -n1 bash -c 'echo input is 2' ``` From 4b3e0f525aef24abcbf525c683f2c0a7678c7664 Mon Sep 17 00:00:00 2001 From: wangjian Date: Wed, 18 Dec 2024 17:00:46 +0800 Subject: [PATCH 02/10] Add rag reader test format file --- tests/basic_tests/test_rag_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index a63c777d7..976c1e394 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -40,11 +40,11 @@ def test_reader_file(self): def test_reader_dir(self): input_dir = self.datasets reader = SimpleDirectoryReader(input_dir=input_dir, - exclude=["*.jpg", "*.mp3", "*.yml", "*.pdf", ".docx", "*.pptx"]) + exclude=["*.yml", "*.pdf", "*.docx", "*.mp4"]) docs = [] for doc in reader(): docs.append(doc) - assert len(docs) == 3 + assert len(docs) == 23 def test_register_local_reader(self): self.doc1.add_reader("**/*.yml", processYml) From 14b21d71f3d04c1cd07d5e794a73e315db7b3b59 Mon Sep 17 00:00:00 2001 From: wangjian Date: Wed, 18 Dec 2024 17:39:59 +0800 Subject: [PATCH 03/10] add openpyxl package for rag reader --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index 30955a06a..a36869444 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -4,3 +4,4 @@ olefile pytest-rerunfailures pytest-order pymilvus>=2.4.7, <2.5.0 +openpyxl From 14888e9d229e6d4ea502bbdd2a63d9cc45827c76 Mon Sep 17 00:00:00 2001 From: wangjian Date: Wed, 18 Dec 2024 18:37:37 +0800 Subject: [PATCH 04/10] add nbconver package for rag reader --- tests/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index a36869444..b67e3c2bb 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -5,3 +5,4 @@ pytest-rerunfailures pytest-order pymilvus>=2.4.7, <2.5.0 openpyxl +nbconvert From 1d488fe1e7b5efac5da480f3a8714238d9dcb36e Mon Sep 17 00:00:00 2001 From: wangjian Date: Wed, 18 Dec 2024 19:06:03 +0800 Subject: [PATCH 05/10] add some package for rag reader --- tests/requirements.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tests/requirements.txt b/tests/requirements.txt index b67e3c2bb..d7a411d23 100644 --- a/tests/requirements.txt +++ b/tests/requirements.txt @@ -6,3 +6,9 @@ pytest-order pymilvus>=2.4.7, <2.5.0 openpyxl nbconvert +python-pptx +EbookLib +html2text +pytesseract +git+https://github.com/openai/whisper.git +pydub From 27f503cebda51b41aaed0bb14766526ef005abb5 Mon Sep 17 00:00:00 2001 From: wangjian Date: Wed, 18 Dec 2024 22:54:53 +0800 Subject: [PATCH 06/10] exclude mp3 for rag reader test --- tests/basic_tests/test_rag_reader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index 976c1e394..dfc50ab93 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -40,7 +40,7 @@ def test_reader_file(self): def test_reader_dir(self): input_dir = self.datasets reader = SimpleDirectoryReader(input_dir=input_dir, - exclude=["*.yml", "*.pdf", "*.docx", "*.mp4"]) + exclude=["*.yml", "*.pdf", "*.docx", "*.mp4", "*.mp3"]) docs = [] for doc in reader(): docs.append(doc) From a0a27c26ec8e4b215f73c71cc0dd5ef1dd990393 Mon Sep 17 00:00:00 2001 From: wangjian Date: Thu, 19 Dec 2024 10:45:41 +0800 Subject: [PATCH 07/10] modify node number in rag reader tests --- tests/basic_tests/test_rag_reader.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index dfc50ab93..f8ea50c35 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -20,9 +20,9 @@ def processYml(file, extra_info=None): class TestRagReader(object): def setup_method(self): - self.doc1 = Document(dataset_path="ci_data/rag_reader", manager=False) - self.doc2 = Document(dataset_path="ci_data/rag_reader", manager=False) - self.datasets = os.path.join(lazyllm.config['data_path'], "ci_data/rag_reader/default/__data/sources") + self.doc1 = Document(dataset_path="rag_reader", manager=False) + self.doc2 = Document(dataset_path="rag_reader", manager=False) + self.datasets = os.path.join(lazyllm.config['data_path'], "rag_reader") def teardown_method(self): self.doc1._impl._local_file_reader = {} @@ -44,7 +44,7 @@ def test_reader_dir(self): docs = [] for doc in reader(): docs.append(doc) - assert len(docs) == 23 + assert len(docs) == 22 def test_register_local_reader(self): self.doc1.add_reader("**/*.yml", processYml) From c522babb0274904fc45aef6a4816d1cdd65b8c70 Mon Sep 17 00:00:00 2001 From: wangjian Date: Thu, 19 Dec 2024 14:02:06 +0800 Subject: [PATCH 08/10] add mp3 format files --- tests/basic_tests/test_rag_reader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index f8ea50c35..4da051d23 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -40,11 +40,11 @@ def test_reader_file(self): def test_reader_dir(self): input_dir = self.datasets reader = SimpleDirectoryReader(input_dir=input_dir, - exclude=["*.yml", "*.pdf", "*.docx", "*.mp4", "*.mp3"]) + exclude=["*.yml", "*.pdf", "*.docx", "*.mp4"]) docs = [] for doc in reader(): docs.append(doc) - assert len(docs) == 22 + assert len(docs) == 23 def test_register_local_reader(self): self.doc1.add_reader("**/*.yml", processYml) From 6de5c0f9ffcbeb4ff873dab05e45bae0589408d9 Mon Sep 17 00:00:00 2001 From: wangjian Date: Fri, 20 Dec 2024 00:36:48 +0800 Subject: [PATCH 09/10] modify mac and win config --- .github/workflows/macOS_test.yml | 1 + .github/workflows/win_test.yml | 5 ++++- tests/basic_tests/test_rag_reader.py | 4 ++-- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/.github/workflows/macOS_test.yml b/.github/workflows/macOS_test.yml index eb30d2c13..1575c7723 100644 --- a/.github/workflows/macOS_test.yml +++ b/.github/workflows/macOS_test.yml @@ -89,6 +89,7 @@ jobs: - name: basic_tests run : | git clone https://$GITHUB_TOKEN@github.com/LazyAGI/LazyLLM-Data.git /tmp/lazyllm/data + brew install ffmpeg pip install -r tests/requirements.txt export LAZYLLM_DATA_PATH=/tmp/lazyllm/data python -m pytest -v --reruns=2 tests/basic_tests/ diff --git a/.github/workflows/win_test.yml b/.github/workflows/win_test.yml index 809c13fcd..d35b4dda1 100644 --- a/.github/workflows/win_test.yml +++ b/.github/workflows/win_test.yml @@ -99,6 +99,9 @@ jobs: shell: bash run: | git clone https://$GITHUB_TOKEN@github.com/LazyAGI/LazyLLM-Data.git D:/a/LazyLLM/data + powershell -Command "Set-ExecutionPolicy RemoteSigned -Scope CurrentUser -Force; irm get.scoop.sh | iex" + export PATH="$HOME/scoop/shims:$PATH" + scoop install ffmpeg pip install -r tests/requirements.txt export LAZYLLM_DATA_PATH=D:/a/LazyLLM/data python -m pytest -m "not skip_on_win" -v --reruns=2 tests/basic_tests @@ -190,4 +193,4 @@ jobs: LAZYLLM_SENSENOVA_SECRET_KEY: ${{ secrets.LAZYLLM_SENSENOVA_SECRET_KEY }} LAZYLLM_PostgreSQL_URL: ${{ secrets.LAZYLLM_PostgreSQL_URL }} GITHUB_TOKEN: ${{ secrets.PERSONAL_GITHUB_TOKEN }} - timeout-minutes: 30 \ No newline at end of file + timeout-minutes: 30 diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index f8ea50c35..4da051d23 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -40,11 +40,11 @@ def test_reader_file(self): def test_reader_dir(self): input_dir = self.datasets reader = SimpleDirectoryReader(input_dir=input_dir, - exclude=["*.yml", "*.pdf", "*.docx", "*.mp4", "*.mp3"]) + exclude=["*.yml", "*.pdf", "*.docx", "*.mp4"]) docs = [] for doc in reader(): docs.append(doc) - assert len(docs) == 22 + assert len(docs) == 23 def test_register_local_reader(self): self.doc1.add_reader("**/*.yml", processYml) From 779423baa06dcb44a8aa811322c2545ddc11df0e Mon Sep 17 00:00:00 2001 From: wangjian Date: Fri, 20 Dec 2024 16:43:18 +0800 Subject: [PATCH 10/10] modify rag_reader path --- tests/basic_tests/test_rag_reader.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/basic_tests/test_rag_reader.py b/tests/basic_tests/test_rag_reader.py index 4da051d23..c760ec2a5 100644 --- a/tests/basic_tests/test_rag_reader.py +++ b/tests/basic_tests/test_rag_reader.py @@ -20,9 +20,9 @@ def processYml(file, extra_info=None): class TestRagReader(object): def setup_method(self): - self.doc1 = Document(dataset_path="rag_reader", manager=False) - self.doc2 = Document(dataset_path="rag_reader", manager=False) - self.datasets = os.path.join(lazyllm.config['data_path'], "rag_reader") + self.doc1 = Document(dataset_path="ci_data/rag_reader_full", manager=False) + self.doc2 = Document(dataset_path="ci_data/rag_reader_full", manager=False) + self.datasets = os.path.join(lazyllm.config['data_path'], "ci_data/rag_reader_full") def teardown_method(self): self.doc1._impl._local_file_reader = {}