Skip to content

Commit

Permalink
Merge branch 'main' into feat/COG-553-graph-memory-projection
Browse files Browse the repository at this point in the history
  • Loading branch information
hajdul88 authored Nov 13, 2024
2 parents 8e3a991 + 7a72aa4 commit bf4eedd
Show file tree
Hide file tree
Showing 24 changed files with 359 additions and 183 deletions.
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_10.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_11.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down
7 changes: 5 additions & 2 deletions .github/workflows/test_python_3_9.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,11 @@ jobs:
- name: Install dependencies
run: poetry install --no-interaction

- name: Run tests
run: poetry run pytest tests/
- name: Run unit tests
run: poetry run pytest cognee/tests/unit/

- name: Run integration tests
run: poetry run pytest cognee/tests/integration/

- name: Run default basic pipeline
env:
Expand Down

This file was deleted.

This file was deleted.

Binary file not shown.
Binary file not shown.
Empty file.
Binary file not shown.
Binary file not shown.
14 changes: 0 additions & 14 deletions cognee/modules/pipelines/operations/__tests__/get_graph_url.py

This file was deleted.

53 changes: 0 additions & 53 deletions cognee/tasks/chunks/__tests__/chunk_by_paragraph.test.py

This file was deleted.

1 change: 0 additions & 1 deletion cognee/tasks/documents/classify_documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,5 +13,4 @@ def classify_documents(data_documents: list[Data]) -> list[Document]:
EXTENSION_TO_DOCUMENT_CLASS[data_item.extension](id = data_item.id, title=f"{data_item.name}.{data_item.extension}", raw_data_location=data_item.raw_data_location, name=data_item.name)
for data_item in data_documents
]

return documents
11 changes: 11 additions & 0 deletions cognee/tests/integration/run_toy_tasks/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
import os

import pytest


@pytest.fixture(autouse=True, scope="session")
def copy_cognee_db_to_target_location():
os.makedirs("cognee/.cognee_system/databases/", exist_ok=True)
os.system(
"cp cognee/tests/integration/run_toy_tasks/data/cognee_db cognee/.cognee_system/databases/cognee_db"
)
Binary file not shown.
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import asyncio
from queue import Queue

from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.tasks.Task import Task


async def pipeline(data_queue):
async def queue_consumer():
while not data_queue.is_closed:
Expand All @@ -17,20 +19,25 @@ async def add_one(num):
async def multiply_by_two(num):
yield num * 2

tasks_run = run_tasks([
Task(queue_consumer),
Task(add_one),
Task(multiply_by_two),
])
tasks_run = run_tasks(
[
Task(queue_consumer),
Task(add_one),
Task(multiply_by_two),
],
pipeline_name="test_run_tasks_from_queue",
)

results = [2, 4, 6, 8, 10, 12, 14, 16, 18]
results = [2, 4, 6, 8, 10, 12, 14, 16, 18, 20]
index = 0
async for result in tasks_run:
print(result)
assert result == results[index]
assert (
result == results[index]
), f"at {index = }: {result = } != {results[index] = }"
index += 1

async def main():

async def run_queue():
data_queue = Queue()
data_queue.is_closed = False

Expand All @@ -42,5 +49,6 @@ async def queue_producer():

await asyncio.gather(pipeline(data_queue), queue_producer())

if __name__ == "__main__":
asyncio.run(main())

def test_run_tasks_from_queue():
asyncio.run(run_queue())
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import asyncio

from cognee.modules.pipelines.operations.run_tasks import run_tasks
from cognee.modules.pipelines.tasks.Task import Task


async def main():
async def run_and_check_tasks():
def number_generator(num):
for i in range(num):
yield i + 1
Expand All @@ -18,19 +19,25 @@ async def multiply_by_two(num):
async def add_one_single(num):
yield num + 1

pipeline = run_tasks([
Task(number_generator),
Task(add_one, task_config = {"batch_size": 5}),
Task(multiply_by_two, task_config = {"batch_size": 1}),
Task(add_one_single),
], 10)
pipeline = run_tasks(
[
Task(number_generator),
Task(add_one, task_config={"batch_size": 5}),
Task(multiply_by_two, task_config={"batch_size": 1}),
Task(add_one_single),
],
10,
pipeline_name="test_run_tasks",
)

results = [5, 7, 9, 11, 13, 15, 17, 19, 21, 23]
index = 0
async for result in pipeline:
print(result)
assert result == results[index]
assert (
result == results[index]
), f"at {index = }: {result = } != {results[index] = }"
index += 1

if __name__ == "__main__":
asyncio.run(main())

def test_run_tasks():
asyncio.run(run_and_check_tasks())
34 changes: 34 additions & 0 deletions cognee/tests/unit/documents/PdfDocument_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
import os
import uuid

from cognee.modules.data.processing.document_types.PdfDocument import PdfDocument

GROUND_TRUTH = [
{"word_count": 879, "len_text": 5622, "cut_type": "sentence_end"},
{"word_count": 951, "len_text": 6384, "cut_type": "sentence_end"},
]


def test_PdfDocument():
test_file_path = os.path.join(
os.sep,
*(os.path.dirname(__file__).split(os.sep)[:-2]),
"test_data",
"artificial-intelligence.pdf",
)
pdf_doc = PdfDocument(
id=uuid.uuid4(), name="Test document.pdf", raw_data_location=test_file_path
)

for ground_truth, paragraph_data in zip(
GROUND_TRUTH, pdf_doc.read(chunk_size=1024)
):
assert (
ground_truth["word_count"] == paragraph_data.word_count
), f'{ground_truth["word_count"] = } != {paragraph_data.word_count = }'
assert ground_truth["len_text"] == len(
paragraph_data.text
), f'{ground_truth["len_text"] = } != {len(paragraph_data.text) = }'
assert (
ground_truth["cut_type"] == paragraph_data.cut_type
), f'{ground_truth["cut_type"] = } != {paragraph_data.cut_type = }'
Loading

0 comments on commit bf4eedd

Please sign in to comment.