Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

API: show status of parsing #1504

Merged
merged 2 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions api/apps/dataset_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -750,7 +750,27 @@ def get_message_during_parsing_document(id, message):
# ----------------------------stop parsing-----------------------------------------------------

# ----------------------------show the status of the file-----------------------------------------------------
@manager.route("/<dataset_id>/documents/<document_id>/status", methods=["GET"])
@login_required
def show_parsing_status(dataset_id, document_id):
try:
# valid dataset
exist, _ = KnowledgebaseService.get_by_id(dataset_id)
if not exist:
return construct_json_result(code=RetCode.DATA_ERROR,
message=f"This dataset: '{dataset_id}' cannot be found!")
# valid document
exist, _ = DocumentService.get_by_id(document_id)
if not exist:
return construct_json_result(code=RetCode.DATA_ERROR,
message=f"This document: '{document_id}' is not a valid document.")

_, doc = DocumentService.get_by_id(document_id) # get doc object
doc_attributes = doc.to_dict()

return construct_json_result(data={"progress": doc_attributes["progress"], "status": doc_attributes["status"]}, code=RetCode.SUCCESS)
except Exception as e:
return construct_error_response(e)
# ----------------------------list the chunks of the file-----------------------------------------------------

# -- --------------------------delete the chunk-----------------------------------------------------
Expand Down
4 changes: 4 additions & 0 deletions sdk/python/ragflow/ragflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,11 @@ def start_parsing_documents(self, dataset_id, doc_ids=None):
# ----------------------------stop parsing-----------------------------------------------------

# ----------------------------show the status of the file-----------------------------------------------------
def show_parsing_status(self, dataset_id, document_id):
endpoint = f"{self.dataset_url}/{dataset_id}/documents/{document_id}/status"
res = requests.get(endpoint, headers=self.authorization_header)

return res.json()
# ----------------------------list the chunks of the file-----------------------------------------------------

# ----------------------------delete the chunk-----------------------------------------------------
Expand Down
45 changes: 45 additions & 0 deletions sdk/python/test/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -953,7 +953,52 @@ def test_start_parsing_multiple_documents_in_the_dataset_whose_parser_id_is_ille
# ----------------------------stop parsing-----------------------------------------------------

# ----------------------------show the status of the file-----------------------------------------------------
def test_show_status_with_success(self):
# create a dataset
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
created_res = ragflow.create_dataset("test_show_status_with_success")
created_res_id = created_res["data"]["dataset_id"]
# upload files
file_paths = ["test_data/lol.txt"]
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
# get the doc_id
data = uploading_res["data"][0]
doc_id = data["id"]
# parse file
res = ragflow.start_parsing_document(created_res_id, doc_id)
assert res["code"] == RetCode.SUCCESS and res["message"] == ""
# show status
status_res = ragflow.show_parsing_status(created_res_id, doc_id)
assert status_res["code"] == RetCode.SUCCESS and status_res["data"]["status"] == "1"

def test_show_status_nonexistent_document(self):
"""
Test showing the status of a document which does not exist.
"""
# create a dataset
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
created_res = ragflow.create_dataset("test_show_status_nonexistent_document")
created_res_id = created_res["data"]["dataset_id"]
res = ragflow.show_parsing_status(created_res_id, "imagination")
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This document: 'imagination' is not a valid document."

def test_show_status_document_in_nonexistent_dataset(self):
"""
Test showing the status of a document whose dataset is nonexistent.
"""
# create a dataset
ragflow = RAGFlow(API_KEY, HOST_ADDRESS)
created_res = ragflow.create_dataset("test_show_status_document_in_nonexistent_dataset")
created_res_id = created_res["data"]["dataset_id"]
# upload files
file_paths = ["test_data/test.txt"]
uploading_res = ragflow.upload_local_file(created_res_id, file_paths)
# get the doc_id
data = uploading_res["data"][0]
doc_id = data["id"]
# parse
res = ragflow.show_parsing_status("imagination", doc_id)
assert res["code"] == RetCode.DATA_ERROR and res["message"] == "This dataset: 'imagination' cannot be found!"
# ----------------------------list the chunks of the file-----------------------------------------------------

# ----------------------------delete the chunk-----------------------------------------------------
Expand Down