Skip to content

Commit

Permalink
clean string
Browse files Browse the repository at this point in the history
  • Loading branch information
AlisoSouza committed Nov 22, 2024
1 parent cf45c11 commit d8fc111
Showing 1 changed file with 12 additions and 2 deletions.
14 changes: 12 additions & 2 deletions nexus/logs/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@ class Message(models.Model):

def __str__(self) -> str:
return f"{self.status} - {self.contact_urn}"

def clean_string(self, s: str) -> str:
import unicodedata, string, re
s = s.lower()
s = " ".join(s.split())
s = unicodedata.normalize("NFKD", s).encode("ascii", "ignore").decode("utf-8")
s = re.sub(f"[{re.escape(string.punctuation)}]", "", s)
return s

@property
def groundedness_details(self):
Expand All @@ -52,8 +60,10 @@ def groundedness_details(self):
"score": sentence.get("score"),
}
for chunk in self.messagelog.chunks_json:
evidence: str = sentence.get("evidence", "").strip('"')
if evidence.lower() in chunk.get("full_page").lower():
sentence = sentence.get("evidence", "")
clean_evidence: str = self.clean_string(sentence)
clean_chunk = self.clean_string(chunk.get("full_page"))
if clean_evidence in clean_chunk:
sentence_stats["sources"].append(
{
"filename": chunk.get("filename"),
Expand Down

0 comments on commit d8fc111

Please sign in to comment.