From 092a4a3c5adb8f53ee98da0672050d5b2d8520ae Mon Sep 17 00:00:00 2001 From: Kevin Hu Date: Tue, 3 Sep 2024 13:35:13 +0800 Subject: [PATCH] enable 3 char words to finegrind tokenize --- rag/nlp/query.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rag/nlp/query.py b/rag/nlp/query.py index ccd1a5af2c7..a8d2d0b5968 100644 --- a/rag/nlp/query.py +++ b/rag/nlp/query.py @@ -83,7 +83,7 @@ def question(self, txt, tbl="qa", min_match="60%"): ), tks def need_fine_grained_tokenize(tk): - if len(tk) < 4: + if len(tk) < 3: return False if re.match(r"[0-9a-z\.\+#_\*-]+$", tk): return False