Skip to content

Commit

Permalink
remove doc from supported processing types (infiniflow#488)
Browse files Browse the repository at this point in the history
### What problem does this PR solve?
infiniflow#474 

### Type of change

- [x] Bug Fix (non-breaking change which fixes an issue)
  • Loading branch information
KevinHuSh authored Apr 22, 2024
1 parent b052519 commit d2cfa0d
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion rag/app/book.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
pdf_parser = None
sections, tbls = [], []
if re.search(r"\.docx?$", filename, re.IGNORECASE):
if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
doc_parser = DocxParser()
# TODO: table of contents need to be removed
Expand Down
2 changes: 1 addition & 1 deletion rag/app/laws.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"])
pdf_parser = None
sections = []
if re.search(r"\.docx?$", filename, re.IGNORECASE):
if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
for txt in Docx()(filename, binary):
sections.append(txt)
Expand Down
2 changes: 1 addition & 1 deletion rag/app/naive.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,
res = []
pdf_parser = None
sections = []
if re.search(r"\.docx?$", filename, re.IGNORECASE):
if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections, tbls = Docx()(filename, binary)
res = tokenize_table(tbls, doc, eng)
Expand Down
2 changes: 1 addition & 1 deletion rag/app/one.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000,

eng = lang.lower() == "english" # is_english(cks)

if re.search(r"\.docx?$", filename, re.IGNORECASE):
if re.search(r"\.docx$", filename, re.IGNORECASE):
callback(0.1, "Start to parse.")
sections = [txt for txt in laws.Docx()(filename, binary) if txt]
callback(0.8, "Finish parsing.")
Expand Down

0 comments on commit d2cfa0d

Please sign in to comment.