-
Notifications
You must be signed in to change notification settings - Fork 69
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Rag/improve file manage #393
base: main
Are you sure you want to change the base?
Changes from all commits
075c658
7bdeed6
d932ad2
7640161
bf77195
f44c3bd
134c9d6
db63373
7447213
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,6 +13,20 @@ | |
from .global_metadata import RAG_DOC_ID, RAG_DOC_PATH | ||
|
||
|
||
def gen_unique_filepaths(ori_filepath: str) -> str: | ||
assert not os.path.exists(ori_filepath), f"file already exists: {ori_filepath}" | ||
if not os.path.exists(ori_filepath): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 前面刚assert过,这个似乎没用 |
||
return ori_filepath | ||
directory, filename = os.path.split(ori_filepath) | ||
name, ext = os.path.splitext(filename) | ||
ct = 1 | ||
new_filepath = f"{os.path.join(directory, name)}_{ct}{ext}" | ||
while os.path.exists(new_filepath): | ||
ct += 1 | ||
new_filepath = f"{os.path.join(directory, name)}_{ct}{ext}" | ||
return new_filepath | ||
|
||
|
||
class DocManager(lazyllm.ModuleBase): | ||
def __init__(self, dlm: DocListManager) -> None: | ||
super().__init__() | ||
|
@@ -54,6 +68,7 @@ def upload_files(self, files: List[UploadFile], override: bool = False, # noqa | |
return BaseResponse(code=400, msg=f'file [{files[idx].filename}]: {err_msg}', data=None) | ||
|
||
file_paths = [os.path.join(self._manager._path, user_path or '', file.filename) for file in files] | ||
file_paths = [gen_unique_filepaths(ele) for ele in file_paths] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 这个逻辑应该配合
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 不能覆盖。不覆盖,只新增 |
||
ids = self._manager.add_files(file_paths, metadatas=metadatas, status=DocListManager.Status.working) | ||
results = [] | ||
for file, path in zip(files, file_paths): | ||
|
@@ -73,7 +88,6 @@ def upload_files(self, files: List[UploadFile], override: bool = False, # noqa | |
except Exception as e: | ||
lazyllm.LOG.error(f'writing file [{path}] to disk failed: [{e}]') | ||
raise e | ||
|
||
file_id = gen_docid(path) | ||
self._manager.update_file_status([file_id], status=DocListManager.Status.success) | ||
results.append('Success') | ||
|
@@ -105,7 +119,7 @@ def add_files(self, files: List[str] = Body(...), | |
exist_id = exists_files_info.get(file, None) | ||
if exist_id: | ||
update_kws = dict(fileid=exist_id, status=DocListManager.Status.success) | ||
if metadatas: update_kws["metadata"] = json.dumps(metadatas[idx]) | ||
if metadatas: update_kws["meta"] = json.dumps(metadatas[idx]) | ||
self._manager.update_file_message(**update_kws) | ||
exist_ids.append(exist_id) | ||
id_mapping[file] = exist_id | ||
|
@@ -178,15 +192,11 @@ def delete_files(self, request: FileGroupRequest): | |
if request.group_name: | ||
return self.delete_files_from_group(request) | ||
else: | ||
self._manager.update_kb_group_file_status( | ||
file_ids=request.file_ids, status=DocListManager.Status.deleting) | ||
docs = self._manager.update_file_status(file_ids=request.file_ids, status=DocListManager.Status.deleting) | ||
|
||
for doc in docs: | ||
if os.path.exists(path := doc[1]): | ||
document_list = self._manager.delete_files(request.file_ids) | ||
for doc in document_list: | ||
print("DELETE doc:", doc.path) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 删掉print |
||
if os.path.exists(path := doc.path): | ||
os.remove(path) | ||
|
||
self._manager.update_file_status(file_ids=request.file_ids, status=DocListManager.Status.deleted) | ||
return BaseResponse() | ||
except Exception as e: | ||
return BaseResponse(code=500, msg=str(e), data=None) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个逻辑应该过时了吧,应保持文件系统的内容和db一致,目前不应出现db中有,但文件系统中没有的情况
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
如果文件系统有,db没有,则应通过轮询加进去,保持Document数据表和文件系统一致
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
这个assert 应该删掉。但这个函数得留着,是http 前端服务在上传文件的时候用的,只新增不覆盖