Skip to content

Commit

Permalink
Merge pull request #32 from weni-ai/feature/no-lower
Browse files Browse the repository at this point in the history
Remove strings lowering process
  • Loading branch information
johncordeiro authored Apr 8, 2024
2 parents 64c4a2d + 857d6aa commit a2e4ed2
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 13 deletions.
2 changes: 1 addition & 1 deletion app/handlers/content_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def delete_batch(self):
def search(self, request: ContentBaseSearchRequest, Authorization: Annotated[str | None, Header()] = None):
token_verification(Authorization)
response = self.content_base_indexer.search(
search=request.search.lower(),
search=request.search,
threshold=request.threshold,
filter=request.filter
)
Expand Down
2 changes: 1 addition & 1 deletion app/indexer/content_bases.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ def index(self, texts: List, metadatas: dict):
self.storage.delete(ids=ids)

docs = [
Document(page_content=text.lower(), metadata=metadatas)
Document(page_content=text, metadata=metadatas)
for text in texts
]

Expand Down
14 changes: 7 additions & 7 deletions app/loaders/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def raw_text(self) -> str:
for i, page in enumerate(pages):
text = page.page_content
if text:
raw_text += text.lower()
raw_text += text
return raw_text


Expand Down Expand Up @@ -85,7 +85,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]:
pages = self.load()
split_pages = []
for page in pages:
page_content = page.page_content.lower()
page_content = page.page_content
metadatas = page.metadata
metadatas.update({"full_page": page_content})

Expand Down Expand Up @@ -118,7 +118,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]:
split_pages = []

for page in pages:
page_content = page.page_content.lower()
page_content = page.page_content
metadatas = page.metadata
metadatas.update({"full_page": page_content})

Expand All @@ -134,7 +134,7 @@ def raw_text(self) -> str:
for i, page in enumerate(pages):
text = page.page_content
if text:
raw_text += text.lower()
raw_text += text
return raw_text


Expand All @@ -155,7 +155,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]:
pages = self.load()
split_pages = []
for page in pages:
page_content = page.page_content.lower()
page_content = page.page_content
metadatas = page.metadata
metadatas.update({"full_page": page_content})

Expand Down Expand Up @@ -202,7 +202,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]:
pages = self.load()
split_pages = []
for page in pages:
page_content = page.page_content.lower()
page_content = page.page_content
metadatas = page.metadata
metadatas.update({"full_page": page_content})

Expand Down Expand Up @@ -230,7 +230,7 @@ def load_and_split_text(self, text_splitter: ITextSplitter) -> List[Document]:

pages = self.loader.load_and_split()
for page in pages:
page_content = page.page_content.lower()
page_content = page.page_content
metadatas = page.metadata
metadatas.update({"full_page": page_content})

Expand Down
8 changes: 4 additions & 4 deletions app/tests/test_document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,25 +98,25 @@ def test_load_pdf(self):
file_path = f'{self.path}/{self.file_name}.pdf'
data_loader = DataLoader(pdf_loader, file_path)
raw_text = data_loader.raw_text()
self.assertEqual(raw_text, self.text_string.lower())
self.assertEqual(raw_text, self.text_string)

def test_load_txt(self):
file_path = f'{self.path}/{self.file_name}.txt'
data_loader = DataLoader(txt_loader, file_path)
raw_text = data_loader.raw_text()
self.assertEqual(raw_text, self.text_string.lower())
self.assertEqual(raw_text, self.text_string)

def test_load_udocx(self):
file_path = f'{self.path}/{self.file_name}.docx'
data_loader = DataLoader(u_docx_loader, file_path)
raw_text = data_loader.raw_text()
self.assertEqual(raw_text, self.text_string.lower())
self.assertEqual(raw_text, self.text_string)

def test_load_docx(self):
file_path = f'{self.path}/{self.file_name}.docx'
data_loader = DataLoader(docx_loader, file_path)
raw_text = data_loader.raw_text()
self.assertEqual(raw_text, self.text_string.lower())
self.assertEqual(raw_text, self.text_string)

def test_load_xlsx(self):
file_path = f'{self.path}/{self.file_name}.xlsx'
Expand Down

0 comments on commit a2e4ed2

Please sign in to comment.