Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

HBAI-194: Support manual pasted text for embedding doc #26

Merged
merged 1 commit into from
Mar 5, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion backends/embedding/embedding.py
Original file line number Diff line number Diff line change
Expand Up @@ -394,7 +394,8 @@ def create_embedding(
flush=True,
)
for ichunk, ch in enumerate(chunks):
print(f"[embedding api] Chunk ({ichunk}):\n\n{ch}")
# .encode() prevents crash on undefined/unmapped string chars
print(f"[embedding api] Chunk ({ichunk}):\n\n{ch.get_content().encode('utf-8')}", flush=True)

# Create document embeddings from chunks
service_context = ServiceContext.from_defaults(
Expand Down
12 changes: 10 additions & 2 deletions backends/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,13 +433,14 @@ async def create_memory(
description = form.description
tags = common.parse_valid_tags(form.tags)
url_path = form.urlPath
text_input = form.textInput
tmp_input_file_path = ""
chunk_size = form.chunkSize
chunk_overlap = form.chunkOverlap
chunk_strategy = form.chunkStrategy

if file == None and url_path == "":
raise Exception("You must supply a file upload or url.")
if file == None and url_path == "" and text_input == "":
raise Exception("You must supply a file upload, url or text.")
if not document_name or not collection_name:
raise Exception("You must supply a collection and memory name.")
if tags == None:
Expand All @@ -461,6 +462,13 @@ async def create_memory(
os.makedirs(tmp_folder)
# Download the file and save to disk
await common.get_file_from_url(url_path, tmp_input_file_path, app)
elif text_input:
print(f"[homebrew api] Saving raw text to file...\n{text_input}")
if not os.path.exists(tmp_folder):
os.makedirs(tmp_folder)
# Write to file
with open(tmp_input_file_path, "w") as f:
f.write(text_input)
elif file:
print("[homebrew api] Saving uploaded file to disk...")
# Read the uploaded file in chunks of 1mb,
Expand Down
1 change: 1 addition & 0 deletions backends/server/classes.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,7 @@ class EmbedDocumentRequest(BaseModel):
description: Optional[str] = ""
tags: Optional[str] = ""
urlPath: Optional[str] = ""
textInput: Optional[str] = ""
chunkSize: Optional[int] = None
chunkOverlap: Optional[int] = None
chunkStrategy: Optional[str] = None
Expand Down