From 6edc9de9d8799badacfdaa4ada768230b7ac511f Mon Sep 17 00:00:00 2001 From: Fachuan Bai Date: Thu, 19 Sep 2024 19:19:27 +0800 Subject: [PATCH] Storage: Rename all the variables about get file to storage from minio. (#2497) https://github.com/infiniflow/ragflow/issues/2496 ### What problem does this PR solve? _Briefly describe what this PR aims to solve. Include background context that will help reviewers understand the purpose of the PR._ ### Type of change - [ ] Bug Fix (non-breaking change which fixes an issue) - [ ] New Feature (non-breaking change which adds functionality) - [ ] Documentation Update - [x] Refactoring - [ ] Performance Improvement - [ ] Other (please describe): --- api/apps/api_app.py | 4 ++-- api/apps/dataset_api.py | 6 +++--- api/apps/document_app.py | 6 +++--- api/apps/file_app.py | 2 +- api/apps/sdk/doc.py | 6 +++--- api/db/services/file2document_service.py | 2 +- rag/svr/task_executor.py | 6 +++--- 7 files changed, 16 insertions(+), 16 deletions(-) diff --git a/api/apps/api_app.py b/api/apps/api_app.py index 388f5dd3f9f..110a6b82686 100644 --- a/api/apps/api_app.py +++ b/api/apps/api_app.py @@ -478,7 +478,7 @@ def upload(): e, doc = DocumentService.get_by_id(doc["id"]) doc = doc.to_dict() doc["tenant_id"] = tenant_id - bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) + bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"]) queue_tasks(doc, bucket, name) except Exception as e: return server_error_response(e) @@ -640,7 +640,7 @@ def document_rm(): if not tenant_id: return get_data_error_result(retmsg="Tenant not found!") - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + b, n = File2DocumentService.get_storage_address(doc_id=doc_id) if not DocumentService.remove_document(doc, tenant_id): return get_data_error_result( diff --git a/api/apps/dataset_api.py b/api/apps/dataset_api.py index a7bbde8b103..d00c52bb803 100644 --- a/api/apps/dataset_api.py +++ b/api/apps/dataset_api.py @@ -420,7 +420,7 @@ def delete_document(document_id, dataset_id): # string f" reason!", code=RetCode.AUTHENTICATION_ERROR) # get the doc's id and location - real_dataset_id, location = File2DocumentService.get_minio_address(doc_id=document_id) + real_dataset_id, location = File2DocumentService.get_storage_address(doc_id=document_id) if real_dataset_id != dataset_id: return construct_json_result(message=f"The document {document_id} is not in the dataset: {dataset_id}, " @@ -595,7 +595,7 @@ def download_document(dataset_id, document_id): code=RetCode.ARGUMENT_ERROR) # The process of downloading - doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id) # minio address + doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address file_stream = STORAGE_IMPL.get(doc_id, doc_location) if not file_stream: return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) @@ -736,7 +736,7 @@ def parsing_document_internal(id): doc_attributes = doc_attributes.to_dict() doc_id = doc_attributes["id"] - bucket, doc_name = File2DocumentService.get_minio_address(doc_id=doc_id) + bucket, doc_name = File2DocumentService.get_storage_address(doc_id=doc_id) binary = STORAGE_IMPL.get(bucket, doc_name) parser_name = doc_attributes["parser_id"] if binary: diff --git a/api/apps/document_app.py b/api/apps/document_app.py index 9b0834dd52c..b33bff4cdf7 100644 --- a/api/apps/document_app.py +++ b/api/apps/document_app.py @@ -297,7 +297,7 @@ def rm(): if not tenant_id: return get_data_error_result(retmsg="Tenant not found!") - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + b, n = File2DocumentService.get_storage_address(doc_id=doc_id) if not DocumentService.remove_document(doc, tenant_id): return get_data_error_result( @@ -342,7 +342,7 @@ def run(): e, doc = DocumentService.get_by_id(id) doc = doc.to_dict() doc["tenant_id"] = tenant_id - bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) + bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"]) queue_tasks(doc, bucket, name) return get_json_result(data=True) @@ -393,7 +393,7 @@ def get(doc_id): if not e: return get_data_error_result(retmsg="Document not found!") - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + b, n = File2DocumentService.get_storage_address(doc_id=doc_id) response = flask.make_response(STORAGE_IMPL.get(b, n)) ext = re.search(r"\.([^.]+)$", doc.name) diff --git a/api/apps/file_app.py b/api/apps/file_app.py index 8aec9b832ff..4f4b44af98f 100644 --- a/api/apps/file_app.py +++ b/api/apps/file_app.py @@ -332,7 +332,7 @@ def get(file_id): e, file = FileService.get_by_id(file_id) if not e: return get_data_error_result(retmsg="Document not found!") - b, n = File2DocumentService.get_minio_address(file_id=file_id) + b, n = File2DocumentService.get_storage_address(file_id=file_id) response = flask.make_response(STORAGE_IMPL.get(b, n)) ext = re.search(r"\.([^.]+)$", file.name) if ext: diff --git a/api/apps/sdk/doc.py b/api/apps/sdk/doc.py index ecc4b22b0cd..21b983466c7 100644 --- a/api/apps/sdk/doc.py +++ b/api/apps/sdk/doc.py @@ -286,7 +286,7 @@ def download_document(document_id,tenant_id): code=RetCode.ARGUMENT_ERROR) # The process of downloading - doc_id, doc_location = File2DocumentService.get_minio_address(doc_id=document_id) # minio address + doc_id, doc_location = File2DocumentService.get_storage_address(doc_id=document_id) # minio address file_stream = STORAGE_IMPL.get(doc_id, doc_location) if not file_stream: return construct_json_result(message="This file is empty.", code=RetCode.DATA_ERROR) @@ -373,7 +373,7 @@ def rm(tenant_id): if not tenant_id: return get_data_error_result(retmsg="Tenant not found!") - b, n = File2DocumentService.get_minio_address(doc_id=doc_id) + b, n = File2DocumentService.get_storage_address(doc_id=doc_id) if not DocumentService.remove_document(doc, tenant_id): return get_data_error_result( @@ -438,7 +438,7 @@ def run(tenant_id): e, doc = DocumentService.get_by_id(id) doc = doc.to_dict() doc["tenant_id"] = tenant_id - bucket, name = File2DocumentService.get_minio_address(doc_id=doc["id"]) + bucket, name = File2DocumentService.get_storage_address(doc_id=doc["id"]) queue_tasks(doc, bucket, name) return get_json_result(data=True) diff --git a/api/db/services/file2document_service.py b/api/db/services/file2document_service.py index 9280a030583..e04ed190cb7 100644 --- a/api/db/services/file2document_service.py +++ b/api/db/services/file2document_service.py @@ -69,7 +69,7 @@ def update_by_file_id(cls, file_id, obj): @classmethod @DB.connection_context() - def get_minio_address(cls, doc_id=None, file_id=None): + def get_storage_address(cls, doc_id=None, file_id=None): if doc_id: f2d = cls.get_by_document_id(doc_id) else: diff --git a/rag/svr/task_executor.py b/rag/svr/task_executor.py index 5273874e736..b25bbbea2f6 100644 --- a/rag/svr/task_executor.py +++ b/rag/svr/task_executor.py @@ -137,7 +137,7 @@ def collect(): return tasks -def get_minio_binary(bucket, name): +def get_storage_binary(bucket, name): return STORAGE_IMPL.get(bucket, name) @@ -155,8 +155,8 @@ def build(row): chunker = FACTORY[row["parser_id"].lower()] try: st = timer() - bucket, name = File2DocumentService.get_minio_address(doc_id=row["doc_id"]) - binary = get_minio_binary(bucket, name) + bucket, name = File2DocumentService.get_storage_address(doc_id=row["doc_id"]) + binary = get_storage_binary(bucket, name) cron_logger.info( "From minio({}) {}/{}".format(timer() - st, row["location"], row["name"])) except TimeoutError as e: