Skip to content

Commit

Permalink
Merge branch 'dev' into COG-970-refactor-tokenizing
Browse files Browse the repository at this point in the history
  • Loading branch information
dexters1 committed Jan 24, 2025
2 parents 902979c + 72e332f commit 77a7285
Show file tree
Hide file tree
Showing 12 changed files with 211 additions and 29 deletions.
40 changes: 20 additions & 20 deletions .github/workflows/profiling.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,32 +68,32 @@ jobs:
echo "HEAD_SHA=${{ github.event.pull_request.head.sha }}" >> $GITHUB_ENV
# Run profiler on the base branch
- name: Run profiler on base branch
env:
BASE_SHA: ${{ env.BASE_SHA }}
run: |
echo "Profiling the base branch for code_graph_pipeline.py"
echo "Checking out base SHA: $BASE_SHA"
git checkout $BASE_SHA
echo "This is the working directory: $PWD"
# Ensure the script is executable
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# Run Scalene
poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py
# Run profiler on head branch
# - name: Run profiler on head branch
# - name: Run profiler on base branch
# env:
# HEAD_SHA: ${{ env.HEAD_SHA }}
# BASE_SHA: ${{ env.BASE_SHA }}
# run: |
# echo "Profiling the head branch for code_graph_pipeline.py"
# echo "Checking out head SHA: $HEAD_SHA"
# git checkout $HEAD_SHA
# echo "Profiling the base branch for code_graph_pipeline.py"
# echo "Checking out base SHA: $BASE_SHA"
# git checkout $BASE_SHA
# echo "This is the working directory: $PWD"
# # Ensure the script is executable
# chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# # Run Scalene
# poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
# poetry run pyinstrument --renderer json -o base_results.json cognee/api/v1/cognify/code_graph_pipeline.py

# Run profiler on head branch
- name: Run profiler on head branch
env:
HEAD_SHA: ${{ env.HEAD_SHA }}
run: |
echo "Profiling the head branch for code_graph_pipeline.py"
echo "Checking out head SHA: $HEAD_SHA"
git checkout $HEAD_SHA
echo "This is the working directory: $PWD"
# Ensure the script is executable
chmod +x cognee/api/v1/cognify/code_graph_pipeline.py
# Run Scalene
poetry run pyinstrument --renderer json -o head_results.json cognee/api/v1/cognify/code_graph_pipeline.py
# # Compare profiling results
# - name: Compare profiling results
Expand Down
6 changes: 5 additions & 1 deletion .github/workflows/reusable_python_example.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ on:
description: "Location of example script to run"
required: true
type: string
arguments:
description: "Arguments for example script"
required: false
type: string
secrets:
GRAPHISTRY_USERNAME:
required: true
Expand Down Expand Up @@ -53,4 +57,4 @@ jobs:
LLM_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
run: poetry run python ${{ inputs.example-location }}
run: poetry run python ${{ inputs.example-location }} ${{ inputs.arguments }}
22 changes: 22 additions & 0 deletions .github/workflows/test_code_graph_example.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
name: test | code graph example

on:
workflow_dispatch:
pull_request:
types: [labeled, synchronize]


concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

jobs:
run_simple_example_test:
uses: ./.github/workflows/reusable_python_example.yml
with:
example-location: ./examples/python/code_graph_example.py
arguments: "--repo_path ./evals"
secrets:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GRAPHISTRY_USERNAME: ${{ secrets.GRAPHISTRY_USERNAME }}
GRAPHISTRY_PASSWORD: ${{ secrets.GRAPHISTRY_PASSWORD }}
32 changes: 32 additions & 0 deletions Dockerfile_modal
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
FROM python:3.11-slim

# Set environment variables
ENV PIP_NO_CACHE_DIR=true
ENV PATH="${PATH}:/root/.poetry/bin"
ENV PYTHONPATH=/app
ENV RUN_MODE=modal
ENV SKIP_MIGRATIONS=true

# System dependencies
RUN apt-get update && apt-get install -y \
gcc \
libpq-dev \
git \
curl \
build-essential \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /app


ENV PYTHONPATH=/app
WORKDIR /app
COPY pyproject.toml poetry.lock /app/


RUN pip install poetry

RUN poetry install --all-extras --no-root --without dev

COPY cognee/ /app/cognee
COPY README.md /app/README.md
22 changes: 22 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,6 +241,28 @@ Please see the cognee [Development Guide](https://docs.cognee.ai/quickstart/) fo
```bash
pip install cognee
```
### Deployment at Scale (Modal)

Scale cognee in 4(+1) simple steps to handle enterprise workloads using [Modal](https://modal.com)'s GPU-powered infrastructure

**1. Install the modal python client**
```bash
pip install modal
```
**2. Create a free account on [Modal](https://modal.com)**

**3. Set Up Modal API Key**
```bash
modal token set --token-id TOKEN_ID --token-secret TOKEN_SECRET --profile=PROFILE
modal profile activate PROFILE
```
**4. Run cognee example**

This simple example will deploy separate cognee instances building their own memory stores and answering a list of questions at scale.
```bash
modal run -d modal_deployment.py
```
**5. Change the modal_deploy script and develop your own AI memory at scale 🚀**

## 💫 Contributors

Expand Down
1 change: 1 addition & 0 deletions cognee-mcp/src/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from .server import mcp


def main():
"""Main entry point for the package."""
mcp.run(transport="stdio")
9 changes: 6 additions & 3 deletions cognee-mcp/src/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,9 +4,9 @@

# Create server parameters for stdio connection
server_params = StdioServerParameters(
command="mcp", # Executable
args=["run", "src/server.py"], # Optional command line arguments
env=None # Optional environment variables
command="mcp", # Executable
args=["run", "src/server.py"], # Optional command line arguments
env=None, # Optional environment variables
)

text = """
Expand All @@ -27,6 +27,7 @@
more accurate classifications or predictions over time.
"""


async def run():
async with stdio_client(server_params) as (read, write):
async with ClientSession(read, write, timedelta(minutes=3)) as session:
Expand All @@ -37,6 +38,8 @@ async def run():

print(f"Cognify result: {toolResult}")


if __name__ == "__main__":
import asyncio

asyncio.run(run())
7 changes: 4 additions & 3 deletions cognee-mcp/src/server.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,15 @@
import os
import cognee
import importlib.util

# from PIL import Image as PILImage
from mcp.server.fastmcp import FastMCP
from cognee.api.v1.search import SearchType
from cognee.shared.data_models import KnowledgeGraph

mcp = FastMCP("cognee", timeout=120000)


@mcp.tool()
async def cognify(text: str, graph_model_file: str = None, graph_model_name: str = None) -> str:
"""Build knowledge graph from the input text"""
Expand All @@ -19,9 +21,9 @@ async def cognify(text: str, graph_model_file: str = None, graph_model_name: str
await cognee.add(text)

try:
await cognee.cognify(graph_model=graph_model)
await cognee.cognify(graph_model=graph_model)
except Exception as e:
raise ValueError(f"Failed to cognify: {str(e)}")
raise ValueError(f"Failed to cognify: {str(e)}")

return "Ingested"

Expand Down Expand Up @@ -57,7 +59,6 @@ async def prune() -> str:
# raise ValueError(f"Failed to create visualization: {str(e)}")



def node_to_string(node):
node_data = ", ".join(
[f'{key}: "{value}"' for key, value in node.items() if key in ["id", "name"]]
Expand Down
1 change: 0 additions & 1 deletion cognee/tasks/repo_processor/get_non_code_files.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from cognee.modules.data.methods.get_dataset_data import get_dataset_data
from cognee.modules.data.methods.get_datasets_by_name import get_datasets_by_name
from cognee.modules.data.models import Data
from cognee.modules.data.operations.write_metadata import write_metadata
from cognee.modules.ingestion.data_types import BinaryData
from cognee.modules.users.methods import get_default_user
from cognee.shared.CodeGraphEntities import Repository
Expand Down
94 changes: 94 additions & 0 deletions modal_deployment.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
import modal
import os
import logging
import asyncio
import cognee
import signal

from cognee.api.v1.search import SearchType
from cognee.shared.utils import setup_logging

app = modal.App("cognee-runner")

image = (
modal.Image.from_dockerfile(path="Dockerfile_modal", force_build=False)
.copy_local_file("pyproject.toml", "pyproject.toml")
.copy_local_file("poetry.lock", "poetry.lock")
.env({"ENV": os.getenv("ENV"), "LLM_API_KEY": os.getenv("LLM_API_KEY")})
.poetry_install_from_file(poetry_pyproject_toml="pyproject.toml")
.pip_install("protobuf", "h2")
)


@app.function(image=image, concurrency_limit=10)
async def entry(text: str, query: str):
setup_logging(logging.ERROR)
await cognee.prune.prune_data()
await cognee.prune.prune_system(metadata=True)
await cognee.add(text)
await cognee.cognify()
search_results = await cognee.search(SearchType.GRAPH_COMPLETION, query_text=query)

return {
"text": text,
"query": query,
"answer": search_results[0] if search_results else None,
}


@app.local_entrypoint()
async def main():
text_queries = [
{
"text": "NASA's Artemis program aims to return humans to the Moon by 2026, focusing on sustainable exploration and preparing for future Mars missions.",
"query": "When does NASA plan to return humans to the Moon under the Artemis program?",
},
{
"text": "According to a 2022 UN report, global food waste amounts to approximately 931 million tons annually, with households contributing 61% of the total.",
"query": "How much food waste do households contribute annually according to the 2022 UN report?",
},
{
"text": "The 2021 census data revealed that Tokyo's population reached 14 million, reflecting a 2.1% increase compared to the previous census conducted in 2015.",
"query": "What was Tokyo's population according to the 2021 census data?",
},
{
"text": "A recent study published in the Journal of Nutrition found that consuming 30 grams of almonds daily can lower LDL cholesterol levels by 7% over a 12-week period.",
"query": "How much can daily almond consumption lower LDL cholesterol according to the study?",
},
{
"text": "Amazon's Prime membership grew to 200 million subscribers in 2023, marking a 10% increase from the previous year, driven by exclusive content and faster delivery options.",
"query": "How many Prime members did Amazon have in 2023?",
},
{
"text": "A new report by the International Energy Agency states that global renewable energy capacity increased by 295 gigawatts in 2022, primarily driven by solar and wind power expansion.",
"query": "By how much did global renewable energy capacity increase in 2022 according to the report?",
},
{
"text": "The World Health Organization reported in 2023 that the global life expectancy has risen to 73.4 years, an increase of 5.5 years since the year 2000.",
"query": "What is the current global life expectancy according to the WHO's 2023 report?",
},
{
"text": "The FIFA World Cup 2022 held in Qatar attracted a record-breaking audience of 5 billion people across various digital and traditional broadcasting platforms.",
"query": "How many people watched the FIFA World Cup 2022?",
},
{
"text": "The European Space Agency's JUICE mission, launched in 2023, aims to explore Jupiter's icy moons, including Ganymede, Europa, and Callisto, over the next decade.",
"query": "Which moons is the JUICE mission set to explore?",
},
{
"text": "According to a report by the International Labour Organization, the global unemployment rate in 2023 was estimated at 5.4%, reflecting a slight decrease compared to the previous year.",
"query": "What was the global unemployment rate in 2023 according to the ILO?",
},
]

tasks = [entry.remote.aio(item["text"], item["query"]) for item in text_queries]

results = await asyncio.gather(*tasks)

print("\nFinal Results:")

for result in results:
print(result)
print("----")

os.kill(os.getpid(), signal.SIGTERM)
3 changes: 2 additions & 1 deletion poetry.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ httpx = "0.27.0"
bokeh="^3.6.2"
nltk = "3.9.1"
google-generativeai = {version = "^0.8.4", optional = true}
parso = {version = "^0.8.4", optional = true}
jedi = {version = "^0.19.2", optional = true}


[tool.poetry.extras]
Expand All @@ -96,6 +98,7 @@ falkordb = ["falkordb"]
groq = ["groq"]
milvus = ["pymilvus"]
docs = ["unstructured"]
codegraph = ["parso", "jedi"]

[tool.poetry.group.dev.dependencies]
pytest = "^7.4.0"
Expand Down

0 comments on commit 77a7285

Please sign in to comment.