diff --git a/.gitignore b/.gitignore index fa6f90f1..fab1e831 100644 --- a/.gitignore +++ b/.gitignore @@ -139,6 +139,7 @@ venv/ ENV/ env.bak/ venv.bak/ +pytest.env # Spyder project settings .spyderproject @@ -186,4 +187,4 @@ docker.env # ignore docs dynamically generated at build time. /docs/api -/docs/tests \ No newline at end of file +/docs/tests diff --git a/docs/developer-guide/getting-started.md b/docs/developer-guide/getting-started.md index 8f1e9a32..5da6e696 100644 --- a/docs/developer-guide/getting-started.md +++ b/docs/developer-guide/getting-started.md @@ -57,9 +57,24 @@ Some useful examples of how this project can be used: ``` - Run tests + + - Unit tests ```sh poe test + ``` + + - Integration tests + + - Prepare env vars by supplying a pytest.env file required for the integration tests + + ```sh + cp misc/pytest.env.template pytest.env + ## Make edits on pytest.env with the correct values for the test environment then run the tests + ``` + + ```sh + poe test-integration ``` - Run the project diff --git a/misc/pytest.env.template b/misc/pytest.env.template new file mode 100644 index 00000000..63957e24 --- /dev/null +++ b/misc/pytest.env.template @@ -0,0 +1,8 @@ +# Copy and place the following file to the root of the project as pytest.env +# To setup enviroment variables for integration testing +DOCQ_DATA=./.persisted-test-integration/ +DOCQ_OPENAI_API_KEY=YOUR_OPEN_AI_KEY +DOCQ_COOKIE_HMAC_SECRET_KEY=32_char_secret_used_to_encrypt + +# This will use the DOCQ_OPENAI_API_KEY for now. +OPENAI_API_KEY=${DOCQ_OPENAI_API_KEY} diff --git a/misc/test_files/integration_test.pdf b/misc/test_files/integration_test.pdf new file mode 100644 index 00000000..76e952b3 Binary files /dev/null and b/misc/test_files/integration_test.pdf differ diff --git a/poetry.lock b/poetry.lock index 259420c9..d529fc6a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.6.1 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.5.1 and should not be changed by hand. [[package]] name = "accelerate" @@ -261,16 +261,6 @@ files = [ {file = "async_timeout-4.0.3-py3-none-any.whl", hash = "sha256:7405140ff1230c310e51dc27b3145b9092d659ce68ff733fb0cefe3ee42be028"}, ] -[[package]] -name = "atomicwrites" -version = "1.4.1" -description = "Atomic file writes." -optional = false -python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*" -files = [ - {file = "atomicwrites-1.4.1.tar.gz", hash = "sha256:81b2c9071a49367a7f770170e5eec8cb66567cfbbc8c73d20ce5ca4a8d71cf11"}, -] - [[package]] name = "attrs" version = "23.1.0" @@ -2244,8 +2234,8 @@ files = [ [package.dependencies] numpy = [ + {version = ">=1.21.0", markers = "python_version >= \"3.10\""}, {version = ">=1.23.2", markers = "python_version >= \"3.11\""}, - {version = ">=1.21.0", markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] python-dateutil = ">=2.8.2" pytz = ">=2020.1" @@ -2765,27 +2755,25 @@ files = [ [[package]] name = "pytest" -version = "7.1.1" +version = "7.3.1" description = "pytest: simple powerful testing with Python" optional = false python-versions = ">=3.7" files = [ - {file = "pytest-7.1.1-py3-none-any.whl", hash = "sha256:92f723789a8fdd7180b6b06483874feca4c48a5c76968e03bb3e7f806a1869ea"}, - {file = "pytest-7.1.1.tar.gz", hash = "sha256:841132caef6b1ad17a9afde46dc4f6cfa59a05f9555aae5151f73bdf2820ca63"}, + {file = "pytest-7.3.1-py3-none-any.whl", hash = "sha256:3799fa815351fea3a5e96ac7e503a96fa51cc9942c3753cda7651b93c1cfa362"}, + {file = "pytest-7.3.1.tar.gz", hash = "sha256:434afafd78b1d78ed0addf160ad2b77a30d35d4bdf8af234fe621919d9ed15e3"}, ] [package.dependencies] -atomicwrites = {version = ">=1.0", markers = "sys_platform == \"win32\""} -attrs = ">=19.2.0" colorama = {version = "*", markers = "sys_platform == \"win32\""} +exceptiongroup = {version = ">=1.0.0rc8", markers = "python_version < \"3.11\""} iniconfig = "*" packaging = "*" pluggy = ">=0.12,<2.0" -py = ">=1.8.2" -tomli = ">=1.0.0" +tomli = {version = ">=1.0.0", markers = "python_version < \"3.11\""} [package.extras] -testing = ["argcomplete", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] +testing = ["argcomplete", "attrs (>=19.2.0)", "hypothesis (>=3.56)", "mock", "nose", "pygments (>=2.7.2)", "requests", "xmlschema"] [[package]] name = "pytest-bdd" @@ -2903,6 +2891,20 @@ files = [ [package.dependencies] six = ">=1.5" +[[package]] +name = "python-dotenv" +version = "1.0.0" +description = "Read key-value pairs from a .env file and set them as environment variables" +optional = false +python-versions = ">=3.8" +files = [ + {file = "python-dotenv-1.0.0.tar.gz", hash = "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba"}, + {file = "python_dotenv-1.0.0-py3-none-any.whl", hash = "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a"}, +] + +[package.extras] +cli = ["click (>=5.0)"] + [[package]] name = "pytz" version = "2023.3" @@ -3541,7 +3543,7 @@ files = [ ] [package.dependencies] -greenlet = {version = "!=0.4.17", markers = "platform_machine == \"aarch64\" or platform_machine == \"ppc64le\" or platform_machine == \"x86_64\" or platform_machine == \"amd64\" or platform_machine == \"AMD64\" or platform_machine == \"win32\" or platform_machine == \"WIN32\""} +greenlet = {version = "!=0.4.17", markers = "platform_machine == \"win32\" or platform_machine == \"WIN32\" or platform_machine == \"AMD64\" or platform_machine == \"amd64\" or platform_machine == \"x86_64\" or platform_machine == \"ppc64le\" or platform_machine == \"aarch64\""} typing-extensions = ">=4.2.0" [package.extras] @@ -4429,4 +4431,4 @@ testing = ["big-O", "flake8 (<5)", "jaraco.functools", "jaraco.itertools", "more [metadata] lock-version = "2.0" python-versions = "^3.10" -content-hash = "e08c1e97dbfb36153ffc9faad15009c7986ff17a09314dece401fe3ebe5d01c4" +content-hash = "6b1c7d8d8b21cb4485db026acc601b5991149e116e43bc9275231455c8fbd70c" diff --git a/pyproject.toml b/pyproject.toml index 5b615f44..92b3b9fc 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -31,7 +31,7 @@ black = { version = "^22.3.0", allow-prereleases = true } xdoctest = "^1.0.0" poethepoet = "^0.16.4" ruff = "^0.0.253" -pytest = "7.1.1" +pytest = "7.3.1" pytest-html = "^3.1.1" pytest-sugar = "^0.9.6" pytest-reverse = "^1.5.0" @@ -44,6 +44,7 @@ mkdocs-awesome-pages-plugin = "^2.8.0" mkapi = "^1.0.14" pillow = "^9.5.0" cairosvg = "^2.7.0" +python-dotenv = "^1.0.0" [tool.poetry.scripts] -c = "poethepoet:main" @@ -123,7 +124,8 @@ exclude_lines = [ [tool.poe.tasks] install = "poetry install --only main" install-dev = "poetry install" -test = "pytest" +test = "pytest tests/unit" +test-integration = "pytest tests/integration" pre-commit = "pre-commit run --all-files" lint-ruff = "ruff check **/*.py --fix" lint-black = "black **/*.py" diff --git a/source/docq/manage_organisations.py b/source/docq/manage_organisations.py index 462f28e6..13713043 100644 --- a/source/docq/manage_organisations.py +++ b/source/docq/manage_organisations.py @@ -6,8 +6,7 @@ from datetime import datetime from typing import List, Tuple -from . import manage_settings -from .manage_users import _add_organisation_member_sql +from . import manage_settings, manage_users from .support.store import get_sqlite_system_file SQL_CREATE_ORGS_TABLE = """ @@ -146,7 +145,7 @@ def create_organisation(name: str, creating_user_id: int) -> int | None: _create_organisation_sql(cursor, name) org_id = cursor.lastrowid is_default_org_admin = True - _add_organisation_member_sql(cursor, org_id, creating_user_id, is_default_org_admin) + manage_users._add_organisation_member_sql(cursor, org_id, creating_user_id, is_default_org_admin) connection.commit() log.info("Created organization %s with member %s", org_id, creating_user_id) except Exception as e: diff --git a/source/docq/manage_spaces.py b/source/docq/manage_spaces.py index e3f11189..5a1d39e1 100644 --- a/source/docq/manage_spaces.py +++ b/source/docq/manage_spaces.py @@ -70,11 +70,9 @@ def _persist_index(index: GPTVectorStoreIndex, space: SpaceKey) -> None: def reindex(space: SpaceKey) -> None: """Reindex documents in a space.""" - (ds_type, ds_configs) = get_space_data_source(space) - - saved_model_settings = get_saved_model_settings_collection(space.org_id) - try: + (ds_type, ds_configs) = get_space_data_source(space) + saved_model_settings = get_saved_model_settings_collection(space.org_id) log.debug("get datasource instance") documents = SpaceDataSources[ds_type].value.load(space, ds_configs) log.debug("docs to index, %s", len(documents)) diff --git a/source/docq/manage_users.py b/source/docq/manage_users.py index e6ef1e89..dc5fc125 100644 --- a/source/docq/manage_users.py +++ b/source/docq/manage_users.py @@ -356,11 +356,11 @@ def rnd(): personal_org_id = cursor.lastrowid - manage_organisations._add_organisation_member_sql(cursor, personal_org_id, user_id, True) + _add_organisation_member_sql(cursor, personal_org_id, user_id, True) if org_id: log.info("Adding user %s to org %s", user_id, org_id) - manage_organisations._add_organisation_member_sql(cursor, org_id, user_id, org_admin) + _add_organisation_member_sql(cursor, org_id, user_id, org_admin) connection.commit() log.info("Created user %s", user_id) diff --git a/tests/integration/__init__.py b/tests/integration/__init__.py new file mode 100644 index 00000000..e9f074c3 --- /dev/null +++ b/tests/integration/__init__.py @@ -0,0 +1,4 @@ +"""Integration tests.""" +from dotenv import load_dotenv + +load_dotenv("pytest.env") diff --git a/tests/integration/backend_integration_test.py b/tests/integration/backend_integration_test.py new file mode 100644 index 00000000..f64be58d --- /dev/null +++ b/tests/integration/backend_integration_test.py @@ -0,0 +1,172 @@ +"""Docq backend integration tests.""" + +import os +from contextlib import suppress +from shutil import rmtree + +import pytest +from docq import config, domain, manage_documents, manage_settings, manage_users, run_queries, setup +from docq.model_selection.main import ( + ModelUsageSettingsCollection, + get_saved_model_settings_collection, +) + +from tests.utilities import ( + DOCQ_DATA_KEY, + TEST_FILE_NAME, + get_auth_results, + get_features, + get_sample_file, + get_user, +) +from web.utils.constants import SessionKeyNameForAuth + + +@pytest.fixture(scope="session" , autouse=True) +def _setup_and_teardown() -> None: + """Setup and teardown for each test.""" + print("Setup") + # setup_env() + setup.init() + + yield + print("Teardown") + with suppress(FileNotFoundError): + rmtree(os.environ[DOCQ_DATA_KEY]) + + +@pytest.fixture() +def test_user() -> dict: + """Get a test user.""" + return get_user() + +## User actions + +# Create user +@pytest.fixture(autouse=True) +def _create_test_user(test_user: dict) -> None: + """Create a test user.""" + with suppress(ValueError): + manage_users.create_user(**test_user) + + +# Login +@pytest.fixture() +def auth_results(test_user: dict) -> dict: + """Authenticate the test user.""" + return get_auth_results(test_user) + + +@pytest.fixture() +def features(auth_results: dict) -> dict[str, domain.FeatureKey]: + """Get available features.""" + return get_features(auth_results[SessionKeyNameForAuth.ID.name]) + + +@pytest.fixture() +def personal_space(auth_results: dict) -> domain.SpaceKey: + """Get personal space.""" + return domain.SpaceKey( + domain.SpaceType.PERSONAL, + auth_results[SessionKeyNameForAuth.ID.name], + auth_results[SessionKeyNameForAuth.SELECTED_ORG_ID.name] + ) + + +@pytest.fixture() +def sample_file() -> bytes: + """Get test file.""" + return get_sample_file() + + +# Upload file to a personal space +@pytest.fixture(autouse=True) +def _upload_test_file(sample_file: bytes, personal_space: domain.SpaceKey) -> None: + """Upload a test file.""" + manage_documents.upload(TEST_FILE_NAME, sample_file, personal_space) + + +# Update organisation settings +@pytest.fixture(autouse=True) +def _update_organisation_settings(auth_results: dict) -> None: + """Update organisation settings.""" + manage_settings.update_organisation_settings( + { + config.SystemSettingsKey.ENABLED_FEATURES.name: [ + f.name for f in config.FeatureType + ], + config.SystemSettingsKey.MODEL_COLLECTION.name: "openai_latest", + }, + org_id=auth_results[SessionKeyNameForAuth.SELECTED_ORG_ID.name], + ) + + +# Get saved model settings +@pytest.fixture() +def saved_model_settings(auth_results: dict) -> ModelUsageSettingsCollection: + """Get saved model settings.""" + return get_saved_model_settings_collection( + auth_results[SessionKeyNameForAuth.SELECTED_ORG_ID.name] + ) + + +# Run tests +def test_user_exists(test_user: dict) -> None: + """Test that user exists.""" + assert manage_users.authenticate(**test_user) is not None, "The test user should exist." + + +def test_the_sample_file_exists(personal_space: domain.SpaceKey) -> None: + """Test that the test file exists.""" + file = manage_documents.get_file(TEST_FILE_NAME, personal_space) + assert os.path.isfile(file), "The test file should exist." + assert file.endswith(TEST_FILE_NAME), "The test file should have the correct name." + + +def test_chat_private_feature(features: domain.FeatureKey, saved_model_settings: ModelUsageSettingsCollection) -> None: + """Run a query against the private chat feature.""" + prompt = """ + You are an AI designed to help humans with their daily activities. + You are currently in a test enviroment to gauge whether this functionality works as expected. + For this test, all you need to do is to echo back the input and append from docq at the end of it. + + Below is a sample expected input (SampleInput) and output (SampleOutput). + SampleInput: Hello World + SampleOutput: Hello World from docq + + Now, let's get started with the following input. + INPUT: {input} + """ + thread_id = 0 + + # Run the query + results = run_queries.query( + prompt.format(input="Test 1"), + features[config.FeatureType.CHAT_PRIVATE.name], + thread_id, + model_settings_collection=saved_model_settings, + ) + assert "Test 1 from docq" in results[1][1], "The query should return the expected response." + + +def test_ask_personal_docs_feature( + features: domain.FeatureKey, + personal_space: domain.SpaceKey, + saved_model_settings: ModelUsageSettingsCollection + ) -> None: + """Run a query against the personal ask feature.""" + prompt = """ + What is the official docq website? + """ + thread_id = 0 + + # Run the query + results = run_queries.query( + prompt, + features[config.FeatureType.ASK_PERSONAL.name], + thread_id, + model_settings_collection=saved_model_settings, + space=personal_space, + ) + ai_response: str = results[1][1] + assert "https://docq.ai" in ai_response, "The query should return the expected response." diff --git a/tests/integration/fixtures/test_user.json b/tests/integration/fixtures/test_user.json new file mode 100644 index 00000000..488870e7 --- /dev/null +++ b/tests/integration/fixtures/test_user.json @@ -0,0 +1,4 @@ +{ + "username": "testuser", + "password": "testpass" +} \ No newline at end of file diff --git a/tests/unit/__init__.py b/tests/unit/__init__.py new file mode 100644 index 00000000..e0310a01 --- /dev/null +++ b/tests/unit/__init__.py @@ -0,0 +1 @@ +"""Unit tests.""" diff --git a/tests/docq/data_source/document_list_item_test.py b/tests/unit/docq/data_source/document_list_item_test.py similarity index 89% rename from tests/docq/data_source/document_list_item_test.py rename to tests/unit/docq/data_source/document_list_item_test.py index c69db16f..55d3a2db 100644 --- a/tests/docq/data_source/document_list_item_test.py +++ b/tests/unit/docq/data_source/document_list_item_test.py @@ -1,9 +1,10 @@ """A module for testing the DocumentList data structure serialisation.""" -from datetime import datetime import sys import tempfile import unittest +from datetime import datetime +from typing import Self from docq.data_source.azure_blob import AzureBlob from docq.domain import DocumentListItem @@ -12,7 +13,8 @@ class TestDocumentListSerialisation(unittest.TestCase): """A unittest class for testing the DocumentList data structure.""" - def setUp(self) -> None: + def setUp(self: Self) -> None: + """Set up the test fixture.""" self.document_list = [ DocumentListItem(link="document1.pdf", indexed_on=1691416038.209924, size=1024234), DocumentListItem(link="document2.pdf", indexed_on=1691416038.209924, size=2048234), @@ -20,7 +22,7 @@ def setUp(self) -> None: self.persist_path = tempfile.mkdtemp() self.filename = "document_list.json" - def test_save_load_document_list(self): + def test_save_load_document_list(self: Self) -> None: """Test that the serialisation format in _save_document_list can be de-serialised by _load_document_list.""" with tempfile.TemporaryDirectory() as persist_path: AzureBlob()._save_document_list( @@ -29,7 +31,7 @@ def test_save_load_document_list(self): loaded_document_list = AzureBlob()._load_document_list(persist_path=persist_path, filename=self.filename) assert loaded_document_list == self.document_list - def test_create_instance_method(self): + def test_create_instance_method(self: Self) -> None: """Test that the create_instance method works as expected.""" document_link = "document1.pdf" document_text = "This is the text of the document." diff --git a/tests/docq/data_source/manual_upload_test.py b/tests/unit/docq/data_source/manual_upload_test.py similarity index 94% rename from tests/docq/data_source/manual_upload_test.py rename to tests/unit/docq/data_source/manual_upload_test.py index 7336a3ee..a93ae7f2 100644 --- a/tests/docq/data_source/manual_upload_test.py +++ b/tests/unit/docq/data_source/manual_upload_test.py @@ -2,6 +2,7 @@ import unittest from datetime import datetime +from typing import Self from unittest.mock import MagicMock, patch from docq.config import SpaceType @@ -11,11 +12,11 @@ class TestManualUpload(unittest.TestCase): # noqa: D101 - def setUp(self) -> None: + def setUp(self: Self) -> None: """Set up the test.""" self.manual_upload = ManualUpload() - def test_get_document_list(self) -> None: + def test_get_document_list(self: Self) -> None: """Test the get_document_list method.""" space = SpaceKey(SpaceType.PERSONAL, 123, 345) configs = {} @@ -44,7 +45,7 @@ def test_get_document_list(self) -> None: ) ] - def test_load(self) -> None: + def test_load(self: Self) -> None: """Test the load method including the metadata fields.""" space = SpaceKey(SpaceType.PERSONAL, 123, 345) configs = {} @@ -54,7 +55,7 @@ def test_load(self) -> None: documents = self.manual_upload.load(space, configs) - assert len(documents) == 10 + assert len(documents) == 11 assert documents[0].metadata[str(DocumentMetadata.SPACE_ID.name).lower()] == 123 assert documents[0].metadata[str(DocumentMetadata.SPACE_TYPE.name).lower()] == "PERSONAL" assert documents[0].metadata[str(DocumentMetadata.DATA_SOURCE_NAME.name).lower()] == "Manual Upload" diff --git a/tests/docq/data_source/support/web_extracting_readthedocs_test.py b/tests/unit/docq/data_source/support/web_extracting_readthedocs_test.py similarity index 85% rename from tests/docq/data_source/support/web_extracting_readthedocs_test.py rename to tests/unit/docq/data_source/support/web_extracting_readthedocs_test.py index dbfb7344..a8c6c644 100644 --- a/tests/docq/data_source/support/web_extracting_readthedocs_test.py +++ b/tests/unit/docq/data_source/support/web_extracting_readthedocs_test.py @@ -1,28 +1,29 @@ """Tests for the ReadTheDocsTextExtractor class.""" import unittest +from typing import Self from bs4 import BeautifulSoup from docq.data_source.support.web_extracting import ReadTheDocsTextExtractor class TestReadTheDocsTextExtractor(unittest.TestCase): # noqa: D101 - def setUp(self) -> None: + def setUp(self: Self) -> None: """Set up the test by creating a new instance of ReadTheDocsTextExtractor.""" self.extractor: ReadTheDocsTextExtractor = ReadTheDocsTextExtractor() - def test_extract_text(self) -> None: + def test_extract_text(self: Self) -> None: """Test that extract_text returns the expected text.""" soup = BeautifulSoup("