Skip to content

Commit

Permalink
Merge branch 'main' into 625-featapi-implement-translations-endpoint
Browse files Browse the repository at this point in the history
  • Loading branch information
CollectiveUnicorn authored Jul 11, 2024
2 parents 58e2e20 + 13cd46f commit 0716dac
Show file tree
Hide file tree
Showing 104 changed files with 4,074 additions and 1,015 deletions.
2 changes: 1 addition & 1 deletion packages/ui/zarf.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ constants:
variables:
- name: LEAPFROGAI_API_BASE_URL #LEAPFROGAI_API_BASE_URL
description: The base URL for the LeapfrogAI API
default: http://api.leapfrogai.svc.cluster.local:8080/openai/v1
default: http://api.leapfrogai.svc.cluster.local:8080
prompt: true
sensitive: true
- name: OPENAI_API_KEY
Expand Down
6 changes: 4 additions & 2 deletions src/leapfrogai_api/backend/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@

import time
import uuid
from typing import BinaryIO, Iterator, AsyncGenerator, Any
import grpc
from typing import BinaryIO, Iterator, AsyncGenerator, Any
import leapfrogai_sdk as lfai
from leapfrogai_api.backend.types import (
ChatCompletionResponse,
Expand Down Expand Up @@ -108,8 +108,10 @@ def read_chunks(file: BinaryIO, chunk_size: int) -> Iterator[lfai.AudioRequest]:
yield lfai.AudioRequest(chunk_data=chunk)


# helper function used to modify objects unless certain fields are missing
def object_or_default(obj: Any | None, _default: Any) -> Any:
if obj:
"""Returns the given object unless it is a None type, otherwise a given default is returned"""
if obj is not None:
return obj
else:
return _default
5 changes: 5 additions & 0 deletions src/leapfrogai_api/backend/rag/document_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
TextLoader,
UnstructuredHTMLLoader,
UnstructuredMarkdownLoader,
UnstructuredPowerPointLoader,
UnstructuredExcelLoader,
)
from langchain_core.documents import Document
Expand All @@ -24,6 +25,8 @@
"text/markdown": UnstructuredMarkdownLoader,
"application/msword": Docx2txtLoader,
"application/vnd.openxmlformats-officedocument.wordprocessingml.document": Docx2txtLoader,
"application/vnd.openxmlformats-officedocument.presentationml.presentation": UnstructuredPowerPointLoader,
"application/vnd.ms-powerpoint": UnstructuredPowerPointLoader,
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet": UnstructuredExcelLoader,
"xls:application/vnd.ms-excel": UnstructuredExcelLoader,
}
Expand All @@ -40,6 +43,8 @@
".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
".xls": "xls:application/vnd.ms-excel",
".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
".ppt": "application/vnd.ms-powerpoint",
}


Expand Down
185 changes: 181 additions & 4 deletions src/leapfrogai_api/backend/rag/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,19 +2,27 @@

import logging
import tempfile
import time

from fastapi import UploadFile

from fastapi import HTTPException, UploadFile, status
from langchain_core.documents import Document
from langchain_core.embeddings import Embeddings
from openai.types.beta.vector_store import FileCounts, VectorStore
from openai.types.beta.vector_stores import VectorStoreFile
from openai.types.beta.vector_stores.vector_store_file import LastError
from supabase_py_async import AsyncClient
from leapfrogai_api.backend.rag.document_loader import load_file, split
from leapfrogai_api.backend.rag.leapfrogai_embeddings import LeapfrogAIEmbeddings
from leapfrogai_api.backend.types import VectorStoreFileStatus
from leapfrogai_api.data.crud_file_bucket import CRUDFileBucket
from leapfrogai_api.data.crud_file_object import CRUDFileObject, FilterFileObject
from leapfrogai_api.data.crud_vector_store import CRUDVectorStore, FilterVectorStore
from leapfrogai_api.backend.types import (
VectorStoreStatus,
VectorStoreFileStatus,
CreateVectorStoreRequest,
ModifyVectorStoreRequest,
)
from leapfrogai_api.data.crud_vector_store_file import (
CRUDVectorStoreFile,
FilterVectorStoreFile,
Expand Down Expand Up @@ -117,17 +125,172 @@ async def index_file(self, vector_store_id: str, file_id: str) -> VectorStoreFil
await crud_vector_store_file.update(
id_=vector_store_file.id, object_=vector_store_file
)
except Exception as e:
except Exception as exc:
vector_store_file.status = VectorStoreFileStatus.FAILED.value
await crud_vector_store_file.update(
id_=vector_store_file.id, object_=vector_store_file
)
raise e
raise exc

return await crud_vector_store_file.get(
filters=FilterVectorStoreFile(vector_store_id=vector_store_id, id=file_id)
)

async def index_files(
self, vector_store_id: str, file_ids: list[str]
) -> list[VectorStoreFile]:
"""Index a list of files into a vector store."""
responses = []
for file_id in file_ids:
try:
response = await self.index_file(
vector_store_id=vector_store_id, file_id=file_id
)
responses.append(response)
except FileAlreadyIndexedError:
logging.info("File %s already exists and cannot be re-indexed", file_id)
continue
except Exception as exc:
raise exc

return responses

async def create_new_vector_store(
self, request: CreateVectorStoreRequest
) -> VectorStore:
"""Create a new vector store given a set of file ids"""
crud_vector_store = CRUDVectorStore(db=self.db)

last_active_at = int(time.time())

expires_after, expires_at = request.get_expiry(last_active_at)

try:
vector_store = VectorStore(
id="", # Leave blank to have Postgres generate a UUID
usage_bytes=0, # Automatically calculated by DB
created_at=0, # Leave blank to have Postgres generate a timestamp
file_counts=FileCounts(
cancelled=0, completed=0, failed=0, in_progress=0, total=0
),
last_active_at=last_active_at, # Set to current time
metadata=request.metadata,
name=request.name,
object="vector_store",
status=VectorStoreStatus.IN_PROGRESS.value,
expires_after=expires_after,
expires_at=expires_at,
)
new_vector_store = await crud_vector_store.create(object_=vector_store)

if request.file_ids != []:
responses = await self.index_files(
new_vector_store.id, request.file_ids
)

for response in responses:
await self._increment_vector_store_file_status(
new_vector_store, response
)

new_vector_store.status = VectorStoreStatus.COMPLETED.value

return await crud_vector_store.update(
id_=new_vector_store.id,
object_=new_vector_store,
)
except Exception as exc:
logging.error(exc)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Unable to parse vector store request",
) from exc

async def modify_existing_vector_store(
self,
vector_store_id: str,
request: ModifyVectorStoreRequest,
) -> VectorStore:
"""Modify an existing vector store given its id."""
crud_vector_store = CRUDVectorStore(db=self.db)

if not (
old_vector_store := await crud_vector_store.get(
filters=FilterVectorStore(id=vector_store_id)
)
):
raise HTTPException(
status_code=status.HTTP_404_NOT_FOUND,
detail="Vector store not found",
)

try:
new_vector_store = VectorStore(
id=vector_store_id,
usage_bytes=old_vector_store.usage_bytes, # Automatically calculated by DB
created_at=old_vector_store.created_at,
file_counts=old_vector_store.file_counts,
last_active_at=old_vector_store.last_active_at, # Update after indexing files
metadata=getattr(request, "metadata", old_vector_store.metadata),
name=getattr(request, "name", old_vector_store.name),
object="vector_store",
status=VectorStoreStatus.IN_PROGRESS.value,
expires_after=old_vector_store.expires_after,
expires_at=old_vector_store.expires_at,
)

await crud_vector_store.update(
id_=vector_store_id,
object_=new_vector_store,
) # Sets status to in_progress for the duration of this function

if request.file_ids:
responses = await self.index_files(
new_vector_store.id, request.file_ids
)
for response in responses:
await self._increment_vector_store_file_status(
new_vector_store, response
)

new_vector_store.status = VectorStoreStatus.COMPLETED.value

last_active_at = int(time.time())
new_vector_store.last_active_at = (
last_active_at # Update after indexing files
)
expires_after, expires_at = request.get_expiry(last_active_at)

if expires_at and expires_at:
new_vector_store.expires_after = expires_after
new_vector_store.expires_at = expires_at

return await crud_vector_store.update(
id_=vector_store_id,
object_=new_vector_store,
)
except Exception as exc:
logging.error(exc)
raise HTTPException(
status_code=status.HTTP_400_BAD_REQUEST,
detail="Unable to parse vector store request",
) from exc

async def file_ids_are_valid(self, file_ids: str | list[str]) -> bool:
"""Check if the provided file ids exist"""
crud_file_object = CRUDFileObject(db=self.db)

if not isinstance(file_ids, list):
file_ids = [file_ids]

try:
for file_id in file_ids:
await crud_file_object.get(filters=FilterFileObject(id=file_id))
except Exception:
return False

return True

async def adelete_file(self, vector_store_id: str, file_id: str) -> bool:
"""Delete a file from the vector store.
Expand Down Expand Up @@ -217,6 +380,20 @@ async def asimilarity_search(self, query: str, vector_store_id: str, k: int = 4)

return response

async def _increment_vector_store_file_status(
self, vector_store: VectorStore, file_response: VectorStoreFile
):
"""Increment the file count of a given vector store based on the file response"""
if file_response.status == VectorStoreFileStatus.COMPLETED.value:
vector_store.file_counts.completed += 1
elif file_response.status == VectorStoreFileStatus.FAILED.value:
vector_store.file_counts.failed += 1
elif file_response.status == VectorStoreFileStatus.IN_PROGRESS.value:
vector_store.file_counts.in_progress += 1
elif file_response.status == VectorStoreFileStatus.CANCELLED.value:
vector_store.file_counts.cancelled += 1
vector_store.file_counts.total += 1

async def _adelete_vector(
self,
vector_store_id: str,
Expand Down
74 changes: 2 additions & 72 deletions src/leapfrogai_api/backend/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@

from fastapi import UploadFile, Form, File
from openai.types import FileObject
from openai.types.beta import Assistant, AssistantTool
from openai.types.beta import Assistant
from openai.types.beta import VectorStore
from openai.types.beta.assistant import (
ToolResources as BetaAssistantToolResources,
ToolResourcesFileSearch,
)
from openai.types.beta.assistant_tool import FileSearchTool

from openai.types.beta.thread import ToolResources as BetaThreadToolResources
from openai.types.beta.thread_create_params import (
ToolResourcesFileSearchVectorStoreChunkingStrategy,
Expand Down Expand Up @@ -525,72 +521,6 @@ class ListFilesResponse(BaseModel):
#############


class CreateAssistantRequest(BaseModel):
"""Request object for creating an assistant."""

model: str = Field(
default="llama-cpp-python",
examples=["llama-cpp-python"],
description="The model to be used by the assistant. Default is 'llama-cpp-python'.",
)
name: str | None = Field(
default=None,
examples=["Froggy Assistant"],
description="The name of the assistant. Optional.",
)
description: str | None = Field(
default=None,
examples=["A helpful assistant."],
description="A description of the assistant's purpose. Optional.",
)
instructions: str | None = Field(
default=None,
examples=["You are a helpful assistant."],
description="Instructions that the assistant should follow. Optional.",
)
tools: list[AssistantTool] | None = Field(
default=None,
examples=[[FileSearchTool(type="file_search")]],
description="List of tools the assistant can use. Optional.",
)
tool_resources: BetaAssistantToolResources | None = Field(
default=None,
examples=[
BetaAssistantToolResources(
file_search=ToolResourcesFileSearch(vector_store_ids=[])
)
],
description="Resources for the tools used by the assistant. Optional.",
)
metadata: dict | None = Field(
default={},
examples=[{}],
description="Additional metadata for the assistant. Optional.",
)
temperature: float | None = Field(
default=None,
examples=[1.0],
description="Sampling temperature for the model. Optional.",
)
top_p: float | None = Field(
default=None,
examples=[1.0],
description="Nucleus sampling parameter. Optional.",
)
response_format: Literal["auto"] | None = Field(
default=None,
examples=["auto"],
description="The format of the assistant's responses. Currently only 'auto' is supported. Optional.",
)


class ModifyAssistantRequest(CreateAssistantRequest):
"""Request object for modifying an assistant."""

# Inherits all fields from CreateAssistantRequest
# All fields are optional for modification


class ListAssistantsResponse(BaseModel):
"""Response object for listing assistants."""

Expand Down
4 changes: 2 additions & 2 deletions src/leapfrogai_api/data/crud_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,6 @@ async def delete(self, filters: dict | None = None) -> bool:
result = await query.execute()

try:
return True if result.data else None
return True if result.data else False
except Exception:
return None
return False
2 changes: 1 addition & 1 deletion src/leapfrogai_api/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ dependencies = [
"supabase-py-async >= 2.5.6",
"langchain >= 0.2.1",
"langchain-community >= 0.2.1",
"unstructured[md,xlsx] >= 0.14.2", # Only specify necessary filetypes to prevent package bloat (e.g. 130MB vs 6GB)
"unstructured[md,xlsx,pptx] >= 0.14.2", # Only specify necessary filetypes to prevent package bloat (e.g. 130MB vs 6GB)
"pylibmagic >= 0.5.0", # Resolves issue with libmagic not being bundled with OS - https://github.com/ahupp/python-magic/issues/233, may not be needed after this is merged https://github.com/ahupp/python-magic/pull/294
"python-magic >= 0.4.27",
"openpyxl >= 3.1.5",
Expand Down
Loading

0 comments on commit 0716dac

Please sign in to comment.