From 01b32bc9bdd78252eef84e56d84561b68f16bc02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=8A=A0=E5=B8=86?= Date: Mon, 22 Apr 2024 15:36:59 +0800 Subject: [PATCH] =?UTF-8?q?.doc=20file=20is=20not=20support=20=EF=BC=8C=20?= =?UTF-8?q?fix=20regular=20expression=20=EF=BC=8Cthen=20message=20can=20be?= =?UTF-8?q?=20alert?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rag/app/laws.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/app/laws.py b/rag/app/laws.py index 947e4dc408..9b77b4fb70 100644 --- a/rag/app/laws.py +++ b/rag/app/laws.py @@ -93,7 +93,7 @@ def chunk(filename, binary=None, from_page=0, to_page=100000, doc["title_sm_tks"] = huqie.qieqie(doc["title_tks"]) pdf_parser = None sections = [] - if re.search(r"\.docx?$", filename, re.IGNORECASE): + if re.search(r"\.docx$", filename, re.IGNORECASE): callback(0.1, "Start to parse.") for txt in Docx()(filename, binary): sections.append(txt)