Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/COG-206' into COG-206
Browse files Browse the repository at this point in the history
  • Loading branch information
Vasilije1990 committed Aug 6, 2024
2 parents 73dd6c2 + 709a10c commit 0519986
Show file tree
Hide file tree
Showing 41 changed files with 432 additions and 369 deletions.
58 changes: 39 additions & 19 deletions cognee/api/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ async def lifespan(app: FastAPI):
app = FastAPI(debug = os.getenv("ENV") != "prod", lifespan = lifespan)

origins = [
"http://127.0.0.1:3000",
"http://frontend:3000",
"http://localhost:3000",
"http://localhost:3001",
Expand Down Expand Up @@ -107,21 +108,33 @@ def health_check():
"""
return {"status": "OK"}

class Payload(BaseModel):
payload: Dict[str, Any]

@app.get("/datasets", response_model=list)
async def get_datasets():
from cognee.api.v1.datasets.datasets import datasets
return await datasets.list_datasets()
try:
from cognee.api.v1.datasets.datasets import datasets
datasets = await datasets.list_datasets()

return JSONResponse(
status_code = 200,
content = [{
"id": str(dataset.id),
"name": dataset.name,
"created_at": dataset.created_at,
"updated_at": dataset.updated_at,
"data": dataset.data,
} for dataset in datasets],
)
except Exception as error:
raise HTTPException(status_code = 500, detail=f"Error retrieving datasets: {str(error)}") from error

@app.delete("/datasets/{dataset_id}", response_model=dict)
async def delete_dataset(dataset_id: str):
from cognee.api.v1.datasets.datasets import datasets
datasets.delete_dataset(dataset_id)
await datasets.delete_dataset(dataset_id)

return JSONResponse(
status_code=200,
content="OK",
status_code = 200,
content = "OK",
)

@app.get("/datasets/{dataset_id}/graph", response_model=list)
Expand All @@ -146,7 +159,7 @@ async def get_dataset_graph(dataset_id: str):
@app.get("/datasets/{dataset_id}/data", response_model=list)
async def get_dataset_data(dataset_id: str):
from cognee.api.v1.datasets.datasets import datasets
dataset_data = datasets.list_data(dataset_id)
dataset_data = await datasets.list_data(dataset_id)
if dataset_data is None:
raise HTTPException(status_code=404, detail=f"Dataset ({dataset_id}) not found.")
return [
Expand All @@ -162,17 +175,24 @@ async def get_dataset_data(dataset_id: str):
@app.get("/datasets/status", response_model=dict)
async def get_dataset_status(datasets: Annotated[List[str], Query(alias="dataset")] = None):
from cognee.api.v1.datasets.datasets import datasets as cognee_datasets
datasets_statuses = cognee_datasets.get_status(datasets)

return JSONResponse(
status_code = 200,
content = datasets_statuses,
)
try:
datasets_statuses = await cognee_datasets.get_status(datasets)

return JSONResponse(
status_code = 200,
content = datasets_statuses,
)
except Exception as error:
return JSONResponse(
status_code = 409,
content = {"error": str(error)}
)

@app.get("/datasets/{dataset_id}/data/{data_id}/raw", response_class=FileResponse)
async def get_raw_data(dataset_id: str, data_id: str):
from cognee.api.v1.datasets.datasets import datasets
dataset_data = datasets.list_data(dataset_id)
dataset_data = await datasets.list_data(dataset_id)
if dataset_data is None:
raise HTTPException(status_code=404, detail=f"Dataset ({dataset_id}) not found.")
data = [data for data in dataset_data if data["id"] == data_id][0]
Expand Down Expand Up @@ -312,10 +332,10 @@ def start_api_server(host: str = "0.0.0.0", port: int = 8000):
try:
logger.info("Starting server at %s:%s", host, port)

import asyncio
from cognee.modules.data.deletion import prune_system, prune_data
asyncio.run(prune_data())
asyncio.run(prune_system(metadata = True))
# import asyncio
# from cognee.modules.data.deletion import prune_system, prune_data
# asyncio.run(prune_data())
# asyncio.run(prune_system(metadata = True))

uvicorn.run(app, host = host, port = port)
except Exception as e:
Expand Down
54 changes: 43 additions & 11 deletions cognee/api/v1/add/add.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,11 @@
from cognee.modules.ingestion import get_matched_datasets, save_data_to_file
from cognee.shared.utils import send_telemetry
from cognee.base_config import get_base_config
from cognee.infrastructure.databases.relational import get_relational_config, create_db_and_tables
from cognee.modules.users.methods import create_default_user
from cognee.infrastructure.databases.relational import get_relational_config, get_relational_engine, create_db_and_tables
from cognee.modules.users.methods import create_default_user, get_default_user
from cognee.modules.users.permissions.methods import give_permission_on_document
from cognee.modules.users.models import User
from cognee.modules.data.operations.ensure_dataset_exists import ensure_dataset_exists

async def add(data: Union[BinaryIO, List[BinaryIO], str, List[str]], dataset_name: str = "main_dataset", user: User = None):
await create_db_and_tables()
Expand Down Expand Up @@ -99,6 +100,9 @@ async def add_files(file_paths: List[str], dataset_name: str, user):
destination = destination,
)

dataset_name = dataset_name.replace(" ", "_").replace(".", "_") if dataset_name is not None else "main_dataset"
dataset = await ensure_dataset_exists(dataset_name)

@dlt.resource(standalone = True, merge_key = "id")
async def data_resources(file_paths: str, user: User):
for file_path in file_paths:
Expand All @@ -107,16 +111,34 @@ async def data_resources(file_paths: str, user: User):

data_id = ingestion.identify(classified_data)

if user is None:
try:
user = await create_default_user()

await give_permission_on_document(user, data_id, "read")
await give_permission_on_document(user, data_id, "write")
except:
pass
file_metadata = classified_data.get_metadata()

from sqlalchemy import select
from cognee.modules.data.models import Data
db_engine = get_relational_engine()
async with db_engine.get_async_session() as session:
data = (await session.execute(
select(Data).filter(Data.id == data_id)
)).scalar_one_or_none()

if data is not None:
data.name = file_metadata["name"]
data.raw_data_location = file_metadata["file_path"]
data.extension = file_metadata["extension"]
data.mime_type = file_metadata["mime_type"]

await session.merge(data)
else:
data = Data(
name = file_metadata["name"],
raw_data_location = file_metadata["file_path"],
extension = file_metadata["extension"],
mime_type = file_metadata["mime_type"],
)
dataset.data.append(data)

await session.merge(dataset)

yield {
"id": data_id,
"name": file_metadata["name"],
Expand All @@ -125,10 +147,20 @@ async def data_resources(file_paths: str, user: User):
"mime_type": file_metadata["mime_type"],
}

await give_permission_on_document(user, data_id, "read")
await give_permission_on_document(user, data_id, "write")


if user is None:
user = await get_default_user()

if user is None:
user = await create_default_user()

run_info = pipeline.run(
data_resources(processed_file_paths, user),
table_name = "file_metadata",
dataset_name = dataset_name.replace(" ", "_").replace(".", "_") if dataset_name is not None else "main_dataset",
dataset_name = dataset_name,
write_disposition = "merge",
)
send_telemetry("cognee.add")
Expand Down
Loading

0 comments on commit 0519986

Please sign in to comment.