Skip to content

Commit

Permalink
fix parser for pptx of which files are from filemanager (#2482)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?

#2467

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
KevinHuSh authored Sep 18, 2024
1 parent 2b0dc01 commit 2324b88
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 11 deletions.
3 changes: 1 addition & 2 deletions api/apps/file2document_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,14 @@ def convert():
doc = DocumentService.insert({
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": kb.parser_id,
"parser_id": FileService.get_parser(file.type, file.name, kb.parser_id),
"parser_config": kb.parser_config,
"created_by": current_user.id,
"type": file.type,
"name": file.name,
"location": file.location,
"size": file.size
})
FileService.set_constant_parser(doc, file.name)
file2document = File2DocumentService.insert({
"id": get_uuid(),
"file_id": id,
Expand Down
18 changes: 9 additions & 9 deletions api/db/services/file_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -357,7 +357,7 @@ def upload_document(self, kb, file_objs, user_id):
doc = {
"id": get_uuid(),
"kb_id": kb.id,
"parser_id": kb.parser_id,
"parser_id": self.get_parser(filetype, filename, kb.parser_id),
"parser_config": kb.parser_config,
"created_by": user_id,
"type": filetype,
Expand All @@ -366,7 +366,6 @@ def upload_document(self, kb, file_objs, user_id):
"size": len(blob),
"thumbnail": thumbnail(filename, blob)
}
self.set_constant_parser(doc, filename)
DocumentService.insert(doc)

FileService.add_file_from_kb(doc, kb_folder["id"], kb.tenant_id)
Expand All @@ -377,12 +376,13 @@ def upload_document(self, kb, file_objs, user_id):
return err, files

@staticmethod
def set_constant_parser(doc, filename):
if doc["type"] == FileType.VISUAL:
doc["parser_id"] = ParserType.PICTURE.value
if doc["type"] == FileType.AURAL:
doc["parser_id"] = ParserType.AUDIO.value
def get_parser(doc_type, filename, default):
if doc_type == FileType.VISUAL:
return ParserType.PICTURE.value
if doc_type == FileType.AURAL:
return ParserType.AUDIO.value
if re.search(r"\.(ppt|pptx|pages)$", filename):
doc["parser_id"] = ParserType.PRESENTATION.value
return ParserType.PRESENTATION.value
if re.search(r"\.(eml)$", filename):
doc["parser_id"] = ParserType.EMAIL.value
return ParserType.EMAIL.value
return default

0 comments on commit 2324b88

Please sign in to comment.