From bcd47f7eeb8fc8f5ab6196a715b857521821a0b3 Mon Sep 17 00:00:00 2001 From: cecilia-uu Date: Mon, 15 Jul 2024 10:41:09 +0800 Subject: [PATCH 1/2] API: show status of parsing --- api/apps/dataset_api.py | 20 ++++++++++++++ sdk/python/ragflow/ragflow.py | 4 +++ sdk/python/test/test_document.py | 45 ++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index 67201c46eb1..99847f37594 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -750,7 +750,27 @@ def get_message_during_parsing_document(id, message): # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- +@manager.route("//documents//status", methods=["GET"]) +@login_required +def show_status(dataset_id, document_id): + try: + # valid dataset + exist, _ = KnowledgebaseService.get_by_id(dataset_id) + if not exist: + return construct_json_result(code=RetCode.DATA_ERROR, + message=f"This dataset: '{dataset_id}' cannot be found!") + # valid document + exist, _ = DocumentService.get_by_id(document_id) + if not exist: + return construct_json_result(code=RetCode.DATA_ERROR, + message=f"This document: '{document_id}' is not a valid document.") + + _, doc = DocumentService.get_by_id(document_id) # get doc object + doc_attributes = doc.to_dict() + return construct_json_result(data={"progress": doc_attributes["progress"], "status": doc_attributes["status"]}, code=RetCode.SUCCESS) + except Exception as e: + return construct_error_response(e) # ----------------------------list the chunks of the file----------------------------------------------------- # -- --------------------------delete the chunk----------------------------------------------------- diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index 9eccc4cb3d4..7b712b65ceb 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -159,7 +159,11 @@ def start_parsing_documents(self, dataset_id, doc_ids=None): # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- + def show_status(self, dataset_id, document_id): + endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status" + res = requests.get(endpoint, headers=self.authorization_header) + return res.json() # ----------------------------list the chunks of the file----------------------------------------------------- # ----------------------------delete the chunk----------------------------------------------------- diff --git a/sdk/python/test/test_document.py b/sdk/python/test/test_document.py index 38839d73a81..c176b718af7 100644 --- a/sdk/python/test/test_document.py +++ b/sdk/python/test/test_document.py @@ -953,7 +953,52 @@ def test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_ille # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- + def test_show_status_with_success(self): + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_with_success") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/lol.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # parse file + res = ragflow.start_parsing_document(created_res_id, doc_id) + assert res["code"] == RetCode.SUCCESS and res["message"] == "" + # show status + status_res = ragflow.show_status(created_res_id, doc_id) + assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "1" + + def test_show_status_nonexistent_document(self): + """ + Test showing the status of a document which does not exist. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_nonexistent_document") + created_res_id = created_res["data"]["dataset_id"] + res = ragflow.show_status(created_res_id, "imagination") + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document." + def test_show_status_document_in_nonexistent_dataset(self): + """ + Test showing the status of a document whose dataset is nonexistent. + """ + # create a dataset + ragflow = RAGFlow(API_KEY, HOST_ADDRESS) + created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset") + created_res_id = created_res["data"]["dataset_id"] + # upload files + file_paths = ["test_data/test.txt"] + uploading_res = ragflow.upload_local_file(created_res_id, file_paths) + # get the doc_id + data = uploading_res["data"][0] + doc_id = data["id"] + # parse + res = ragflow.show_status("imagination", doc_id) + assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!" # ----------------------------list the chunks of the file----------------------------------------------------- # ----------------------------delete the chunk----------------------------------------------------- From 19313040e4135d44283599724292951525fe358d Mon Sep 17 00:00:00 2001 From: cecilia-uu Date: Mon, 15 Jul 2024 16:39:42 +0800 Subject: [PATCH 2/2] rename the method --- api/apps/dataset_api.py | 2 +- sdk/python/ragflow/ragflow.py | 2 +- sdk/python/test/test_document.py | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index 99847f37594..8ac9a623cce 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -752,7 +752,7 @@ def get_message_during_parsing_document(id, message): # ----------------------------show the status of the file----------------------------------------------------- @manager.route("//documents//status", methods=["GET"]) @login_required -def show_status(dataset_id, document_id): +def show_parsing_status(dataset_id, document_id): try: # valid dataset exist, _ = KnowledgebaseService.get_by_id(dataset_id) diff --git a/sdk/python/ragflow/ragflow.py b/sdk/python/ragflow/ragflow.py index 7b712b65ceb..3e8f94984f4 100644 --- a/sdk/python/ragflow/ragflow.py +++ b/sdk/python/ragflow/ragflow.py @@ -159,7 +159,7 @@ def start_parsing_documents(self, dataset_id, doc_ids=None): # ----------------------------stop parsing----------------------------------------------------- # ----------------------------show the status of the file----------------------------------------------------- - def show_status(self, dataset_id, document_id): + def show_parsing_status(self, dataset_id, document_id): endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status" res = requests.get(endpoint, headers=self.authorization_header) diff --git a/sdk/python/test/test_document.py b/sdk/python/test/test_document.py index c176b718af7..a23c71e24a1 100644 --- a/sdk/python/test/test_document.py +++ b/sdk/python/test/test_document.py @@ -968,7 +968,7 @@ def test_show_status_with_success(self): res = ragflow.start_parsing_document(created_res_id, doc_id) assert res["code"] == RetCode.SUCCESS and res["message"] == "" # show status - status_res = ragflow.show_status(created_res_id, doc_id) + status_res = ragflow.show_parsing_status(created_res_id, doc_id) assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "1" def test_show_status_nonexistent_document(self): @@ -979,7 +979,7 @@ def test_show_status_nonexistent_document(self): ragflow = RAGFlow(API_KEY, HOST_ADDRESS) created_res = ragflow.create_dataset("test_show_status_nonexistent_document") created_res_id = created_res["data"]["dataset_id"] - res = ragflow.show_status(created_res_id, "imagination") + res = ragflow.show_parsing_status(created_res_id, "imagination") assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document." def test_show_status_document_in_nonexistent_dataset(self): @@ -997,7 +997,7 @@ def test_show_status_document_in_nonexistent_dataset(self): data = uploading_res["data"][0] doc_id = data["id"] # parse - res = ragflow.show_status("imagination", doc_id) + res = ragflow.show_parsing_status("imagination", doc_id) assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!" # ----------------------------list the chunks of the file-----------------------------------------------------