Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: new routes for docs, users, search #445

Merged
merged 6 commits into from
Aug 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 27 additions & 12 deletions agents-api/agents_api/autogen/Docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,7 @@ class BaseDocSearchRequest(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
)
confidence: Annotated[float, Field(0.5, ge=0.0, le=1.0)]
"""
The confidence cutoff level
"""
alpha: Annotated[float, Field(0.75, ge=0.0, le=1.0)]
"""
The weight to apply to BM25 vs Vector search results. 0 => pure BM25; 1 => pure vector;
"""
mmr: bool = False
"""
Whether to include the MMR algorithm in the search. Optimizes for diversity in search results.
"""
limit: Annotated[int, Field(10, ge=1, le=100)]
lang: Literal["en-US"] = "en-US"
"""
The language to be used for text-only search. Support for other languages coming soon.
Expand Down Expand Up @@ -105,6 +94,20 @@ class DocReference(BaseModel):
distance: float | None = None


class DocSearchResponse(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
)
docs: list[DocReference]
"""
The documents that were found
"""
time: Annotated[float, Field(gt=0.0)]
"""
The time taken to search in seconds
"""


class EmbedQueryRequest(BaseModel):
model_config = ConfigDict(
populate_by_name=True,
Expand All @@ -129,6 +132,14 @@ class HybridDocSearchRequest(BaseDocSearchRequest):
model_config = ConfigDict(
populate_by_name=True,
)
confidence: Annotated[float, Field(0.5, ge=0.0, le=1.0)]
"""
The confidence cutoff level
"""
alpha: Annotated[float, Field(0.75, ge=0.0, le=1.0)]
"""
The weight to apply to BM25 vs Vector search results. 0 => pure BM25; 1 => pure vector;
"""
text: str
"""
Text to use in the search. In `hybrid` search mode, either `text` or both `text` and `vector` fields are required.
Expand Down Expand Up @@ -161,6 +172,10 @@ class VectorDocSearchRequest(BaseDocSearchRequest):
model_config = ConfigDict(
populate_by_name=True,
)
confidence: Annotated[float, Field(0.5, ge=0.0, le=1.0)]
"""
The confidence cutoff level
"""
vector: list[float]
"""
Vector to use in the search. Must be the same dimensions as the embedding model or else an error will be thrown.
Expand Down
9 changes: 8 additions & 1 deletion agents-api/agents_api/autogen/openapi_model.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# ruff: noqa: F401, F403, F405
from typing import Annotated
from typing import Annotated, Generic, TypeVar
from uuid import UUID

from pydantic import AwareDatetime, Field
Expand Down Expand Up @@ -158,3 +158,10 @@ class UpdateTaskRequest(_UpdateTaskRequest):
"extra": "allow",
}
)


DataT = TypeVar("DataT", bound=BaseModel)


class ListResponse(BaseModel, Generic[DataT]):
items: list[DataT]
2 changes: 1 addition & 1 deletion agents-api/agents_api/clients/temporal.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from uuid import UUID

from temporalio.client import Client, TLSConfig, WorkflowHandle
from temporalio.client import Client, TLSConfig

from agents_api.env import (
temporal_client_cert,
Expand Down
12 changes: 3 additions & 9 deletions agents-api/agents_api/models/agent/create_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,18 +43,12 @@ def create_agent(
Constructs and executes a datalog query to create a new agent in the database.
Parameters:
- agent_id (UUID): The unique identifier for the agent.
- agent_id (UUID | None): The unique identifier for the agent.
- developer_id (UUID): The unique identifier for the developer creating the agent.
- name (str): The name of the agent.
- about (str): A description of the agent.
- instructions (list[str], optional): A list of instructions for using the agent. Defaults to an empty list.
- model (str, optional): The model identifier for the agent. Defaults to "julep-ai/samantha-1-turbo".
- metadata (dict, optional): A dictionary of metadata for the agent. Defaults to an empty dict.
- default_settings (dict, optional): A dictionary of default settings for the agent. Defaults to an empty dict.
- client (CozoClient, optional): The CozoDB client instance to use for the query. Defaults to a preconfigured client instance.
- data (CreateAgentRequest): The data for the new agent.
Returns:
Agent: The newly created agent record.
- Agent: The newly created agent record.
"""

agent_id = agent_id or uuid4()
Expand Down
27 changes: 4 additions & 23 deletions agents-api/agents_api/models/docs/delete_doc.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
from typing import Literal
from uuid import UUID

from beartype import beartype
Expand All @@ -13,7 +12,6 @@
partialclass,
rewrap_exceptions,
verify_developer_id_query,
verify_developer_owns_resource_query,
wrap_in_class,
)

Expand All @@ -39,30 +37,25 @@
def delete_doc(
*,
developer_id: UUID,
owner_type: Literal["user", "agent"],
owner_id: UUID,
doc_id: UUID,
) -> tuple[list[str], dict]:
"""Constructs and returns a datalog query for deleting documents and associated information snippets.
This function targets the 'cozodb' database, allowing for the removal of documents and their related information snippets based on the provided document ID and owner (user or agent).
Parameters:
owner_type (Literal["user", "agent"]): The type of the owner, either 'user' or 'agent'.
owner_id (UUID): The UUID of the owner.
doc_id (UUID): The UUID of the document to be deleted.
client (CozoClient): An instance of the CozoClient to execute the query.
Returns:
pd.DataFrame: The result of the executed datalog query.
"""
# Convert UUID parameters to string format for use in the datalog query
owner_id = str(owner_id)
doc_id = str(doc_id)

# The following query is divided into two main parts:
# 1. Deleting information snippets associated with the document
# 2. Deleting the document itself from the owner's collection
# 2. Deleting the document itself
delete_snippets_query = """
# This section constructs the subquery for identifying and deleting all information snippets associated with the given document ID.
# Delete snippets
Expand All @@ -81,29 +74,17 @@ def delete_doc(
"""

delete_doc_query = """
# This section constructs the subquery for deleting the document from the specified owner's (user or agent) document collection.
# Delete the docs
?[doc_id, owner_id, owner_type] <- [[
to_uuid($doc_id),
to_uuid($owner_id),
$owner_type,
]]
?[doc_id] <- [[ to_uuid($doc_id) ]]
:delete docs {
doc_id,
owner_type,
owner_id,
}
:delete docs { doc_id }
:returning
"""

queries = [
verify_developer_id_query(developer_id),
verify_developer_owns_resource_query(
developer_id, f"{owner_type}s", **{f"{owner_type}_id": owner_id}
),
delete_snippets_query,
delete_doc_query,
]

return (queries, {"doc_id": doc_id, "owner_id": owner_id, "owner_type": owner_type})
return (queries, {"doc_id": doc_id})
8 changes: 1 addition & 7 deletions agents-api/agents_api/models/docs/get_doc.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""Module for retrieving document snippets from the CozoDB based on document IDs."""

from typing import Literal
from uuid import UUID

from beartype import beartype
Expand Down Expand Up @@ -38,14 +37,12 @@
def get_doc(
*,
developer_id: UUID,
owner_type: Literal["user", "agent"],
doc_id: UUID,
) -> tuple[list[str], dict]:
"""
Retrieves snippets of documents by their ID from the CozoDB.
Parameters:
owner_type (Literal["user", "agent"]): The type of the owner of the document.
doc_id (UUID): The unique identifier of the document.
client (CozoClient, optional): The CozoDB client instance. Defaults to a pre-configured client.
Expand All @@ -67,16 +64,13 @@ def get_doc(
snippet_data = [index, content]
?[
owner_type,
id,
title,
snippet_data,
created_at,
metadata,
] := input[id],
owner_type = $owner_type,
*docs {
owner_type,
doc_id: id,
title,
created_at,
Expand All @@ -90,4 +84,4 @@ def get_doc(
get_query,
]

return (queries, {"doc_id": doc_id, "owner_type": owner_type})
return (queries, {"doc_id": doc_id})
9 changes: 7 additions & 2 deletions agents-api/agents_api/models/docs/search_docs_hybrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@ def dbsf_fuse(
"""
all_docs = {doc.id: doc for doc in text_results + embedding_results}

text_scores: dict[UUID, float] = {doc.id: -doc.distance for doc in text_results}
assert all(doc.distance is not None in all_docs for doc in text_results)

text_scores: dict[UUID, float] = {
doc.id: -(doc.distance or 0.0) for doc in text_results
}

# Because these are cosine distances, we need to invert them
embedding_scores: dict[UUID, float] = {
Expand Down Expand Up @@ -93,6 +97,7 @@ def search_docs_hybrid(
query: str,
query_embedding: list[float],
k: int = 3,
alpha: float = 0.7, # Weight of the embedding search results (this is a good default)
embed_search_options: dict = {},
text_search_options: dict = {},
**kwargs,
Expand All @@ -118,4 +123,4 @@ def search_docs_hybrid(
**kwargs,
)

return dbsf_fuse(text_results, embedding_results)[:k]
return dbsf_fuse(text_results, embedding_results, alpha)[:k]
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pycozo.client import QueryException
from pydantic import ValidationError

from ...autogen.openapi_model import Transition
from ..utils import (
cozo_query,
partialclass,
Expand Down
107 changes: 107 additions & 0 deletions agents-api/agents_api/models/user/delete_user.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,107 @@
"""
This module contains the implementation of the delete_user_query function, which is responsible for deleting an user and its related default settings from the CozoDB database.
"""

from uuid import UUID

from beartype import beartype
from fastapi import HTTPException
from pycozo.client import QueryException
from pydantic import ValidationError

from ...autogen.openapi_model import ResourceDeletedResponse
from ...common.utils.datetime import utcnow
from ..utils import (
cozo_query,
partialclass,
rewrap_exceptions,
verify_developer_id_query,
verify_developer_owns_resource_query,
wrap_in_class,
)


@rewrap_exceptions(
{
QueryException: partialclass(HTTPException, status_code=400),
ValidationError: partialclass(HTTPException, status_code=400),
TypeError: partialclass(HTTPException, status_code=400),
}
)
@wrap_in_class(
ResourceDeletedResponse,
one=True,
transform=lambda d: {
"id": UUID(d.pop("user_id")),
"deleted_at": utcnow(),
"jobs": [],
},
)
@cozo_query
@beartype
def delete_user(*, developer_id: UUID, user_id: UUID) -> tuple[list[str], dict]:
"""
Constructs and returns a datalog query for deleting an user and its default settings from the database.
Parameters:
- developer_id (UUID): The UUID of the developer owning the user.
- user_id (UUID): The UUID of the user to be deleted.
- client (CozoClient, optional): An instance of the CozoClient to execute the query.
Returns:
- ResourceDeletedResponse: The response indicating the deletion of the user.
"""

queries = [
verify_developer_id_query(developer_id),
verify_developer_owns_resource_query(developer_id, "users", user_id=user_id),
"""
# Delete docs
?[user_id, doc_id] :=
*user_docs{
user_id,
doc_id,
}, user_id = to_uuid($user_id)
:delete user_docs {
user_id,
doc_id
}
:returning
""",
"""
# Delete tools
?[user_id, tool_id] :=
*tools{
user_id,
tool_id,
}, user_id = to_uuid($user_id)
:delete tools {
user_id,
tool_id
}
:returning
""",
"""
# Delete default user settings
?[user_id] <- [[$user_id]]
:delete user_default_settings {
user_id
}
:returning
""",
"""
# Delete the user
?[user_id, developer_id] <- [[$user_id, $developer_id]]
:delete users {
developer_id,
user_id
}
:returning
""",
]

return (queries, {"user_id": str(user_id), "developer_id": str(developer_id)})
Loading
Loading