Skip to content

Commit

Permalink
Add test for CI (#3114)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

Add test for CI

### Type of change

- [x] New Feature (non-breaking change which adds functionality)

Co-authored-by: liuhua <[email protected]>
  • Loading branch information
Feiue and liuhua authored Oct 31, 2024
1 parent 5590a82 commit 9aeb07d
Show file tree
Hide file tree
Showing 13 changed files with 286 additions and 172 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,16 @@ jobs:
echo "RAGFLOW_IMAGE=infiniflow/ragflow:dev" >> docker/.env
sudo docker compose -f docker/docker-compose.yml up -d
- name: Run tests
run: |
export http_proxy=""; export https_proxy=""; export no_proxy=""; export HTTP_PROXY=""; export HTTPS_PROXY=""; export NO_PROXY=""
export HOST_ADDRESS=http://host.docker.internal:9380
until sudo docker exec ragflow-server curl -s --connect-timeout 5 ${HOST_ADDRESS} > /dev/null; do
echo "Waiting for service to be available..."
sleep 5
done
cd sdk/python && poetry install && source .venv/bin/activate && cd test && pytest t_dataset.py t_chat.py t_session.py
- name: Stop ragflow:dev
if: always() # always run this step even if previous steps failed
run: |
Expand Down
8 changes: 4 additions & 4 deletions api/apps/sdk/chat.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ def create(tenant_id):
if kb.chunk_num == 0:
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
kbs = KnowledgebaseService.get_by_ids(ids)
embd_count = list(set(kb.embd_id for kb in kbs))
if embd_count != 1:
embd_count = list(set([kb.embd_id for kb in kbs]))
if len(embd_count) != 1:
return get_result(retmsg='Datasets use different embedding models."',retcode=RetCode.AUTHENTICATION_ERROR)
req["kb_ids"] = ids
# llm
Expand Down Expand Up @@ -167,8 +167,8 @@ def update(tenant_id,chat_id):
if kb.chunk_num == 0:
return get_error_data_result(f"The dataset {kb_id} doesn't own parsed file")
kbs = KnowledgebaseService.get_by_ids(ids)
embd_count=list(set(kb.embd_id for kb in kbs))
if embd_count != 1 :
embd_count=list(set([kb.embd_id for kb in kbs]))
if len(embd_count) != 1 :
return get_result(
retmsg='Datasets use different embedding models."',
retcode=RetCode.AUTHENTICATION_ERROR)
Expand Down
2 changes: 1 addition & 1 deletion api/apps/sdk/dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def delete(tenant_id):
if not KnowledgebaseService.delete_by_id(id):
return get_error_data_result(
retmsg="Delete dataset error.(Database error)")
return get_result(retcode=RetCode.SUCCESS)
return get_result(retcode=RetCode.SUCCESS)

@manager.route('/datasets/<dataset_id>', methods=['PUT'])
@token_required
Expand Down
4 changes: 2 additions & 2 deletions api/apps/sdk/doc.py
Original file line number Diff line number Diff line change
Expand Up @@ -509,9 +509,9 @@ def rm_chunk(tenant_id,dataset_id,document_id):
if chunk_id not in sres.ids:
return get_error_data_result(f"Chunk {chunk_id} not found")
if not ELASTICSEARCH.deleteByQuery(
Q("ids", values=req["chunk_ids"]), search.index_name(tenant_id)):
Q("ids", values=chunk_list), search.index_name(tenant_id)):
return get_error_data_result(retmsg="Index updating failure")
deleted_chunk_ids = req["chunk_ids"]
deleted_chunk_ids = chunk_list
chunk_number = len(deleted_chunk_ids)
DocumentService.decrement_chunk_num(doc.id, doc.kb_id, 1, chunk_number, 0)
return get_result()
Expand Down
6 changes: 4 additions & 2 deletions api/utils/api_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@ def valid(permission,valid_permission,language,valid_language,chunk_method,valid

def valid_parameter(parameter,valid_values):
if parameter and parameter not in valid_values:
return get_error_data_result(f"`{parameter}` is not in {valid_values}")
return get_error_data_result(f"'{parameter}' is not in {valid_values}")

def get_parser_config(chunk_method,parser_config):
if parser_config:
Expand All @@ -354,6 +354,8 @@ def get_parser_config(chunk_method,parser_config):
"laws":{"raptor":{"use_raptor":False}},
"presentation":{"raptor":{"use_raptor":False}},
"one":None,
"knowledge_graph":{"chunk_token_num":8192,"delimiter":"\\n!?;。;!?","entity_types":["organization","person","location","event","time"]}}
"knowledge_graph":{"chunk_token_num":8192,"delimiter":"\\n!?;。;!?","entity_types":["organization","person","location","event","time"]},
"email":None,
"picture":None}
parser_config=key_mapping[chunk_method]
return parser_config
4 changes: 2 additions & 2 deletions sdk/python/hello_ragflow.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
import ragflow
import ragflow_sdk

print(ragflow.__version__)
print(ragflow_sdk.__version__)
97 changes: 96 additions & 1 deletion sdk/python/poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sdk/python/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ package-mode = true
[tool.poetry.dependencies]
python = "^3.10"
requests = "^2.30.0"
pytest = "^8.0.0"


[build-system]
Expand Down
8 changes: 4 additions & 4 deletions sdk/python/test/conftest.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
import pytest
import requests
import string
import random
import os
import pytest
import requests



HOST_ADDRESS = 'http://127.0.0.1:9380'
HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')

def generate_random_email():
return 'user_' + ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))+'@1.com'
Expand Down
14 changes: 8 additions & 6 deletions sdk/python/test/t_chat.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
import os
from ragflow_sdk import RAGFlow
HOST_ADDRESS = 'http://127.0.0.1:9380'

HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')

def test_create_chat_with_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
kb = rag.create_dataset(name="test_create_chat")
displayed_name = "ragflow.txt"
with open("./ragflow.txt","rb") as file:
with open("ragflow.txt", "rb") as file:
blob = file.read()
document = {"displayed_name":displayed_name,"blob":blob}
documents = []
Expand All @@ -22,7 +24,7 @@ def test_update_chat_with_name(get_api_key_fixture):
rag = RAGFlow(API_KEY, HOST_ADDRESS)
kb = rag.create_dataset(name="test_update_chat")
displayed_name = "ragflow.txt"
with open("./ragflow.txt", "rb") as file:
with open("ragflow.txt", "rb") as file:
blob = file.read()
document = {"displayed_name": displayed_name, "blob": blob}
documents = []
Expand All @@ -39,7 +41,7 @@ def test_delete_chats_with_success(get_api_key_fixture):
rag = RAGFlow(API_KEY, HOST_ADDRESS)
kb = rag.create_dataset(name="test_delete_chat")
displayed_name = "ragflow.txt"
with open("./ragflow.txt", "rb") as file:
with open("ragflow.txt", "rb") as file:
blob = file.read()
document = {"displayed_name": displayed_name, "blob": blob}
documents = []
Expand All @@ -53,9 +55,9 @@ def test_delete_chats_with_success(get_api_key_fixture):
def test_list_chats_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
kb = rag.create_dataset(name="test_delete_chat")
kb = rag.create_dataset(name="test_list_chats")
displayed_name = "ragflow.txt"
with open("./ragflow.txt", "rb") as file:
with open("ragflow.txt", "rb") as file:
blob = file.read()
document = {"displayed_name": displayed_name, "blob": blob}
documents = []
Expand Down
109 changes: 55 additions & 54 deletions sdk/python/test/t_dataset.py
Original file line number Diff line number Diff line change
@@ -1,54 +1,55 @@
from ragflow_sdk import RAGFlow
import random
import pytest

HOST_ADDRESS = 'http://127.0.0.1:9380'

def test_create_dataset_with_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
rag.create_dataset("test_create_dataset_with_name")

def test_create_dataset_with_duplicated_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_name")
assert str(exc_info.value) == "Duplicated dataset name in creating dataset."

def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email"]
random_chunk_method = random.choice(valid_chunk_methods)
rag.create_dataset("test_create_dataset_with_random_chunk_method",chunk_method=random_chunk_method)

def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email"]
chunk_method = "invalid_chunk_method"
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_name",chunk_method=chunk_method)
assert str(exc_info.value) == f"{chunk_method} is not in {valid_chunk_methods}"


def test_update_dataset_with_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset("test_update_dataset")
ds.update({"name": "updated_dataset"})


def test_delete_datasets_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset("MA")
rag.delete_datasets(ids=[ds.id])


def test_list_datasets_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
rag.list_datasets()
import os
import random
import pytest
from ragflow_sdk import RAGFlow

HOST_ADDRESS = os.getenv('HOST_ADDRESS', 'http://127.0.0.1:9380')

def test_create_dataset_with_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
rag.create_dataset("test_create_dataset_with_name")

def test_create_dataset_with_duplicated_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_name")
assert str(exc_info.value) == "Duplicated dataset name in creating dataset."

def test_create_dataset_with_random_chunk_method(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive","manual","qa","table","paper","book","laws","presentation","picture","one","knowledge_graph","email"]
random_chunk_method = random.choice(valid_chunk_methods)
rag.create_dataset("test_create_dataset_with_random_chunk_method",chunk_method=random_chunk_method)

def test_create_dataset_with_invalid_parameter(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
valid_chunk_methods = ["naive", "manual", "qa", "table", "paper", "book", "laws", "presentation", "picture", "one",
"knowledge_graph", "email"]
chunk_method = "invalid_chunk_method"
with pytest.raises(Exception) as exc_info:
rag.create_dataset("test_create_dataset_with_name",chunk_method=chunk_method)
assert str(exc_info.value) == f"'{chunk_method}' is not in {valid_chunk_methods}"


def test_update_dataset_with_name(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset("test_update_dataset")
ds.update({"name": "updated_dataset"})


def test_delete_datasets_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
ds = rag.create_dataset("MA")
rag.delete_datasets(ids=[ds.id])


def test_list_datasets_with_success(get_api_key_fixture):
API_KEY = get_api_key_fixture
rag = RAGFlow(API_KEY, HOST_ADDRESS)
rag.list_datasets()
Loading

0 comments on commit 9aeb07d

Please sign in to comment.