diff --git a/Dockerfile b/Dockerfile index 52198a69f..6d319d31f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -54,4 +54,6 @@ RUN groupadd -g 998 annif_user && \ chown -R annif_user:annif_user /annif-projects /Annif/tests/data USER annif_user +ENV GUNICORN_CMD_ARGS="--worker-class uvicorn.workers.UvicornWorker" + CMD annif diff --git a/annif/__init__.py b/annif/__init__.py index a71b9f379..120c7d4cf 100644 --- a/annif/__init__.py +++ b/annif/__init__.py @@ -7,6 +7,8 @@ import os.path from typing import TYPE_CHECKING +from flask import Flask + logging.basicConfig() logger = logging.getLogger("annif") logger.setLevel(level=logging.INFO) @@ -14,12 +16,11 @@ import annif.backend # noqa if TYPE_CHECKING: - from flask.app import Flask + from connexion.apps.flask import FlaskApp def create_flask_app(config_name: str | None = None) -> Flask: """Create a Flask app to be used by the CLI.""" - from flask import Flask _set_tensorflow_loglevel() @@ -31,29 +32,41 @@ def create_flask_app(config_name: str | None = None) -> Flask: return app -def create_app(config_name: str | None = None) -> Flask: +def create_cx_app(config_name: str | None = None) -> FlaskApp: """Create a Connexion app to be used for the API.""" - # 'cxapp' here is the Connexion application that has a normal Flask app - # as a property (cxapp.app) import connexion - from flask_cors import CORS + from connexion.datastructures import MediaTypeDict + from connexion.middleware import MiddlewarePosition + from connexion.validators import FormDataValidator, MultiPartFormDataValidator + from starlette.middleware.cors import CORSMiddleware + import annif.registry from annif.openapi.validation import CustomRequestBodyValidator specdir = os.path.join(os.path.dirname(__file__), "openapi") - cxapp = connexion.App(__name__, specification_dir=specdir) + cxapp = connexion.FlaskApp(__name__, specification_dir=specdir) config_name = _get_config_name(config_name) logger.debug(f"creating connexion app with configuration {config_name}") cxapp.app.config.from_object(config_name) cxapp.app.config.from_envvar("ANNIF_SETTINGS", silent=True) validator_map = { - "body": CustomRequestBodyValidator, + "body": MediaTypeDict( + { + "*/*json": CustomRequestBodyValidator, + "application/x-www-form-urlencoded": FormDataValidator, + "multipart/form-data": MultiPartFormDataValidator, + } + ), } cxapp.add_api("annif.yaml", validator_map=validator_map) # add CORS support - CORS(cxapp.app) + cxapp.add_middleware( + CORSMiddleware, + position=MiddlewarePosition.BEFORE_EXCEPTION, + allow_origins=["*"], + ) if cxapp.app.config["INITIALIZE_PROJECTS"]: annif.registry.initialize_projects(cxapp.app) @@ -64,8 +77,11 @@ def create_app(config_name: str | None = None) -> Flask: cxapp.app.register_blueprint(bp) - # return the Flask app - return cxapp.app + # return the Connexion app + return cxapp + + +create_app = create_cx_app # Alias to allow starting directly with uvicorn run def _get_config_name(config_name: str | None) -> str: diff --git a/annif/cli.py b/annif/cli.py index d8ca1ea56..63a93c795 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -25,15 +25,12 @@ logger = annif.logger click_log.basic_config(logger) - -if len(sys.argv) > 1 and sys.argv[1] in ("run", "routes"): - create_app = annif.create_app # Use Flask with Connexion -else: - # Connexion is not needed for most CLI commands, use plain Flask - create_app = annif.create_flask_app - -cli = FlaskGroup(create_app=create_app, add_version_option=False) +create_app = annif.create_flask_app +cli = FlaskGroup( + create_app=create_app, add_default_commands=False, add_version_option=False +) cli = click.version_option(message="%(version)s")(cli) +cli.params = [opt for opt in cli.params if opt.name not in ("env_file", "app")] @cli.command("list-projects") @@ -442,6 +439,21 @@ def run_eval( ) +@cli.command("run") +@click.option("--port", type=int, default=5000) +@click.option("--log-level") +@click_log.simple_verbosity_option(logger, default="ERROR") +def run_app(**kwargs): + """ + Run Annif in server mode for development. + \f + The server is for development purposes only. + """ + kwargs = {k: v for k, v in kwargs.items() if v is not None} + cxapp = annif.create_cx_app() + cxapp.run(**kwargs) + + FILTER_BATCH_MAX_LIMIT = 15 OPTIMIZE_METRICS = ["Precision (doc avg)", "Recall (doc avg)", "F1 score (doc avg)"] diff --git a/annif/openapi/annif.yaml b/annif/openapi/annif.yaml index c5143313d..74e8a4661 100644 --- a/annif/openapi/annif.yaml +++ b/annif/openapi/annif.yaml @@ -174,7 +174,9 @@ paths: responses: "204": description: successful operation - content: {} + content: + application/json: + {} "404": $ref: '#/components/responses/NotFound' "503": diff --git a/annif/openapi/validation.py b/annif/openapi/validation.py index e57d6830d..2fce37732 100644 --- a/annif/openapi/validation.py +++ b/annif/openapi/validation.py @@ -3,48 +3,37 @@ from __future__ import annotations import logging +from typing import Any -import jsonschema -from connexion import decorators from connexion.exceptions import BadRequestProblem -from connexion.utils import is_null +from connexion.json_schema import format_error_with_path +from connexion.validators import JSONRequestBodyValidator +from jsonschema.exceptions import ValidationError logger = logging.getLogger("openapi.validation") -class CustomRequestBodyValidator(decorators.validation.RequestBodyValidator): +class CustomRequestBodyValidator(JSONRequestBodyValidator): """Custom request body validator that overrides the default error message for the - 'maxItems' validator for the 'documents' property.""" + 'maxItems' validator for the 'documents' property to prevent logging request body + with the contents of all documents.""" def __init__(self, *args, **kwargs) -> None: super().__init__(*args, **kwargs) - def validate_schema( - self, - data: list | dict, - url: str, - ) -> None: - """Validate the request body against the schema.""" - - if self.is_null_value_valid and is_null(data): - return None # pragma: no cover - + def _validate(self, body: Any) -> dict | None: try: - self.validator.validate(data) - except jsonschema.ValidationError as exception: + return self._validator.validate(body) + except ValidationError as exception: if exception.validator == "maxItems" and list(exception.schema_path) == [ "properties", "documents", "maxItems", ]: exception.message = "too many items" - - error_path_msg = self._error_path_message(exception=exception) + error_path_msg = format_error_with_path(exception=exception) logger.error( - "{url} validation error: {error}{error_path_msg}".format( - url=url, error=exception.message, error_path_msg=error_path_msg - ), + f"Validation error: {exception.message}{error_path_msg}", extra={"validator": "body"}, ) raise BadRequestProblem(detail=f"{exception.message}{error_path_msg}") - return None diff --git a/annif/rest.py b/annif/rest.py index f8d9f6d7e..c7f457687 100644 --- a/annif/rest.py +++ b/annif/rest.py @@ -14,8 +14,6 @@ from annif.project import Access if TYPE_CHECKING: - from datetime import datetime - from connexion.lifecycle import ConnexionResponse from annif.corpus.subject import SubjectIndex @@ -43,10 +41,11 @@ def server_error( ) -def show_info() -> dict[str, str]: +def show_info() -> tuple: """return version of annif and a title for the api according to OpenAPI spec""" - return {"title": "Annif REST API", "version": importlib.metadata.version("annif")} + result = {"title": "Annif REST API", "version": importlib.metadata.version("annif")} + return result, 200, {"Content-Type": "application/json"} def language_not_supported_error(lang: str) -> ConnexionResponse: @@ -59,15 +58,16 @@ def language_not_supported_error(lang: str) -> ConnexionResponse: ) -def list_projects() -> dict[str, list[dict[str, str | dict | bool | datetime | None]]]: +def list_projects() -> tuple: """return a dict with projects formatted according to OpenAPI spec""" - return { + result = { "projects": [ proj.dump() for proj in annif.registry.get_projects(min_access=Access.public).values() ] } + return result, 200, {"Content-Type": "application/json"} def show_project( @@ -79,7 +79,7 @@ def show_project( project = annif.registry.get_project(project_id, min_access=Access.hidden) except ValueError: return project_not_found_error(project_id) - return project.dump() + return project.dump(), 200, {"Content-Type": "application/json"} def _suggestion_to_dict( @@ -124,7 +124,7 @@ def suggest( if _is_error(result): return result - return result[0] + return result[0], 200, {"Content-Type": "application/json"} def suggest_batch( @@ -142,7 +142,7 @@ def suggest_batch( return result for document_results, document in zip(result, documents): document_results["document_id"] = document.get("document_id") - return result + return result, 200, {"Content-Type": "application/json"} def _suggest( @@ -214,4 +214,4 @@ def learn( except AnnifException as err: return server_error(err) - return None, 204 + return None, 204, {"Content-Type": "application/json"} diff --git a/annif/templates/home.html b/annif/templates/home.html index 4a0202aa6..a7e6f2c58 100644 --- a/annif/templates/home.html +++ b/annif/templates/home.html @@ -306,7 +306,7 @@

Suggested subjects

\ return; } var this_ = this; - var formData = new FormData(); + var formData = new URLSearchParams(); formData.append('text', this_.text); formData.append('limit', this_.limit); this_.loading = true; diff --git a/docs/source/commands.rst b/docs/source/commands.rst index 849f6aadf..a76ea8b7c 100644 --- a/docs/source/commands.rst +++ b/docs/source/commands.rst @@ -121,7 +121,7 @@ Subject index administration N/A -.. click:: flask.cli:run_command +.. click:: annif.cli:run_app :prog: annif run **REST equivalent** diff --git a/pyproject.toml b/pyproject.toml index ca2a30d1a..be4d87726 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -30,9 +30,7 @@ classifiers = [ [tool.poetry.dependencies] python = ">=3.9,<3.12" -connexion = { version = "2.14.2", extras = ["swagger-ui"] } -flask = "2.2.*" -flask-cors = "4.0.*" +connexion = { version = "~3.0.5", extras = ["flask", "uvicorn", "swagger-ui"] } click = "8.1.*" click-log = "0.4.*" joblib = "1.3.*" diff --git a/tests/conftest.py b/tests/conftest.py index 75247262d..7d7a851ee 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -15,34 +15,39 @@ @pytest.fixture(scope="module") -def app(): +def cxapp(): # make sure the dummy vocab is in place because many tests depend on it subjfile = os.path.join(os.path.dirname(__file__), "corpora", "dummy-subjects.csv") - app = annif.create_app(config_name="annif.default_config.TestingConfig") - with app.app_context(): + cxapp = annif.create_app(config_name="annif.default_config.TestingConfig") + with cxapp.app.app_context(): project = annif.registry.get_project("dummy-en") # the vocab is needed for both English and Finnish language projects vocab = annif.corpus.SubjectFileCSV(subjfile) project.vocab.load_vocabulary(vocab) - return app + return cxapp + + +@pytest.fixture(scope="module") +def app(cxapp): + return cxapp.app @pytest.fixture(scope="module") def app_with_initialize(): - app = annif.create_app(config_name="annif.default_config.TestingInitializeConfig") - return app + cxapp = annif.create_app(config_name="annif.default_config.TestingInitializeConfig") + return cxapp.app @pytest.fixture(scope="module") @unittest.mock.patch.dict(os.environ, {"ANNIF_PROJECTS_INIT": ".*-fi"}) def app_with_initialize_fi_projects(): - app = annif.create_app(config_name="annif.default_config.TestingInitializeConfig") - return app + cxapp = annif.create_app(config_name="annif.default_config.TestingInitializeConfig") + return cxapp.app @pytest.fixture -def app_client(app): - with app.test_client() as app_client: +def app_client(cxapp): + with cxapp.test_client() as app_client: yield app_client diff --git a/tests/test_cli.py b/tests/test_cli.py index 77adeab0f..6ab07c11f 100644 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -1051,25 +1051,19 @@ def test_version_option(): assert result.output.strip() == version.strip() -def test_run(): - result = runner.invoke(annif.cli.cli, ["run", "--help"]) +@mock.patch("connexion.FlaskApp.run") +def test_run(run): + result = runner.invoke(annif.cli.cli, ["run"]) assert not result.exception assert result.exit_code == 0 - assert "Run a local development server." in result.output - - -def test_routes_with_flask_app(): - # When using plain Flask only the static endpoint exists - result = runner.invoke(annif.cli.cli, ["routes"]) - assert re.search(r"static\s+GET\s+\/static\/\", result.output) - assert not re.search(r"app.home\s+GET\s+\/", result.output) + assert run.called -def test_routes_with_connexion_app(): - # When using Connexion all endpoints exist - result = os.popen("python annif/cli.py routes").read() - assert re.search(r"static\s+GET\s+\/static\/", result) - assert re.search(r"app.home\s+GET\s+\/", result) +def test_run_help(): + result = runner.invoke(annif.cli.cli, ["run", "--help"]) + assert not result.exception + assert result.exit_code == 0 + assert "Run Annif in server mode for development." in result.output def test_completion_script_generation(): diff --git a/tests/test_openapi.py b/tests/test_openapi.py index 26e33e4ea..76f33695f 100644 --- a/tests/test_openapi.py +++ b/tests/test_openapi.py @@ -7,37 +7,50 @@ schema = schemathesis.from_path("annif/openapi/annif.yaml") -@schemathesis.check -def check_cors(response, case): - assert response.headers["access-control-allow-origin"] == "*" +@schemathesis.hook("filter_path_parameters") +def filter_path_parameters(context, path_parameters): + # Exclude path parameters containing newline which crashes application + # https://github.com/spec-first/connexion/issues/1908 + if path_parameters is not None and "project_id" in path_parameters: + return "%0A" not in path_parameters["project_id"] + return True @schema.parametrize() @settings(max_examples=10) -def test_openapi_fuzzy(case, app): - response = case.call_wsgi(app) - case.validate_response(response, additional_checks=(check_cors,)) +def test_openapi_fuzzy(case, cxapp): + response = case.call_asgi(cxapp) + case.validate_response(response) @pytest.mark.slow @schema.parametrize(endpoint="/v1/projects/{project_id}") @settings(max_examples=50) -def test_openapi_fuzzy_target_dummy_fi(case, app): +def test_openapi_fuzzy_target_dummy_fi(case, cxapp): case.path_parameters = {"project_id": "dummy-fi"} - response = case.call_wsgi(app) + response = case.call_asgi(cxapp) case.validate_response(response) +def test_openapi_cors(app_client): + # test that the service supports CORS by simulating a cross-origin request + app_client.headers = {"Origin": "http://somedomain.com"} + req = app_client.get( + "http://localhost:8000/v1/projects", + ) + assert req.headers["access-control-allow-origin"] == "*" + + def test_openapi_list_projects(app_client): req = app_client.get("http://localhost:8000/v1/projects") assert req.status_code == 200 - assert "projects" in req.get_json() + assert "projects" in req.json() def test_openapi_show_project(app_client): req = app_client.get("http://localhost:8000/v1/projects/dummy-fi") assert req.status_code == 200 - assert req.get_json()["project_id"] == "dummy-fi" + assert req.json()["project_id"] == "dummy-fi" def test_openapi_show_project_nonexistent(app_client): @@ -51,7 +64,7 @@ def test_openapi_suggest(app_client): "http://localhost:8000/v1/projects/dummy-fi/suggest", data=data ) assert req.status_code == 200 - assert "results" in req.get_json() + assert "results" in req.json() def test_openapi_suggest_nonexistent(app_client): @@ -76,7 +89,7 @@ def test_openapi_suggest_batch(app_client): "http://localhost:8000/v1/projects/dummy-fi/suggest-batch", json=data ) assert req.status_code == 200 - body = req.get_json() + body = req.json() assert len(body) == 32 assert body[0]["results"][0]["label"] == "dummy-fi" @@ -87,7 +100,7 @@ def test_openapi_suggest_batch_too_many_documents(app_client): "http://localhost:8000/v1/projects/dummy-fi/suggest-batch", json=data ) assert req.status_code == 400 - assert req.get_json()["detail"] == "too many items - 'documents'" + assert req.json()["detail"] == "too many items - 'documents'" def test_openapi_learn(app_client): diff --git a/tests/test_project.py b/tests/test_project.py index c2258ceb4..6f1d55d62 100644 --- a/tests/test_project.py +++ b/tests/test_project.py @@ -132,10 +132,10 @@ def test_get_project_default_params_fasttext(registry): def test_get_project_invalid_config_file(): - app = annif.create_app( + cxapp = annif.create_app( config_name="annif.default_config.TestingInvalidProjectsConfig" ) - with app.app_context(): + with cxapp.app.app_context(): with pytest.raises(ConfigurationException): annif.registry.get_project("duplicatedvocab") @@ -315,23 +315,23 @@ def test_project_initialized_with_selection(app_with_initialize_fi_projects): def test_project_file_not_found(): - app = annif.create_app(config_name="annif.default_config.TestingNoProjectsConfig") - with app.app_context(): + cxapp = annif.create_app(config_name="annif.default_config.TestingNoProjectsConfig") + with cxapp.app.app_context(): with pytest.raises(ValueError): annif.registry.get_project("dummy-en") def test_project_file_toml(): - app = annif.create_app(config_name="annif.default_config.TestingTOMLConfig") - with app.app_context(): + cxapp = annif.create_app(config_name="annif.default_config.TestingTOMLConfig") + with cxapp.app.app_context(): assert len(annif.registry.get_projects()) == 2 assert annif.registry.get_project("dummy-fi-toml").project_id == "dummy-fi-toml" assert annif.registry.get_project("dummy-en-toml").project_id == "dummy-en-toml" def test_project_directory(): - app = annif.create_app(config_name="annif.default_config.TestingDirectoryConfig") - with app.app_context(): + cxapp = annif.create_app(config_name="annif.default_config.TestingDirectoryConfig") + with cxapp.app.app_context(): assert len(annif.registry.get_projects()) == 18 + 2 assert annif.registry.get_project("dummy-fi").project_id == "dummy-fi" assert annif.registry.get_project("dummy-fi-toml").project_id == "dummy-fi-toml" diff --git a/tests/test_rest.py b/tests/test_rest.py index e56a24b21..c905fc1de 100644 --- a/tests/test_rest.py +++ b/tests/test_rest.py @@ -7,7 +7,7 @@ def test_rest_list_projects(app): with app.app_context(): - result = annif.rest.list_projects() + result = annif.rest.list_projects()[0] project_ids = [proj["project_id"] for proj in result["projects"]] # public project should be returned assert "dummy-fi" in project_ids @@ -21,7 +21,7 @@ def test_rest_list_projects(app): def test_rest_show_info(app): with app.app_context(): - result = annif.rest.show_info() + result = annif.rest.show_info()[0] version = importlib.metadata.version("annif") assert result == {"title": "Annif REST API", "version": version} @@ -29,14 +29,14 @@ def test_rest_show_info(app): def test_rest_show_project_public(app): # public projects should be accessible via REST with app.app_context(): - result = annif.rest.show_project("dummy-fi") + result = annif.rest.show_project("dummy-fi")[0] assert result["project_id"] == "dummy-fi" def test_rest_show_project_hidden(app): # hidden projects should be accessible if you know the project id with app.app_context(): - result = annif.rest.show_project("dummy-en") + result = annif.rest.show_project("dummy-en")[0] assert result["project_id"] == "dummy-en" @@ -58,7 +58,7 @@ def test_rest_suggest_public(app): with app.app_context(): result = annif.rest.suggest( "dummy-fi", {"text": "example text", "limit": 10, "threshold": 0.0} - ) + )[0] assert "results" in result @@ -67,7 +67,7 @@ def test_rest_suggest_hidden(app): with app.app_context(): result = annif.rest.suggest( "dummy-en", {"text": "example text", "limit": 10, "threshold": 0.0} - ) + )[0] assert "results" in result @@ -101,7 +101,7 @@ def test_rest_suggest_with_language_override(app): result = annif.rest.suggest( "dummy-vocablang", {"text": "example text", "limit": 10, "threshold": 0.0, "language": "en"}, - ) + )[0] assert result["results"][0]["label"] == "dummy" @@ -120,7 +120,7 @@ def test_rest_suggest_with_different_vocab_language(app): with app.app_context(): result = annif.rest.suggest( "dummy-vocablang", {"text": "example text", "limit": 10, "threshold": 0.0} - ) + )[0] assert result["results"][0]["label"] == "dummy-fi" @@ -128,7 +128,7 @@ def test_rest_suggest_with_notations(app): with app.app_context(): result = annif.rest.suggest( "dummy-fi", {"text": "example text", "limit": 10, "threshold": 0.0} - ) + )[0] assert result["results"][0]["notation"] is None @@ -136,7 +136,7 @@ def test_rest_suggest_batch_one_doc(app): with app.app_context(): result = annif.rest.suggest_batch( "dummy-fi", {"documents": [{"text": "example text"}]} - ) + )[0] assert len(result) == 1 assert result[0]["results"][0]["label"] == "dummy-fi" assert result[0]["document_id"] is None @@ -147,7 +147,7 @@ def test_rest_suggest_batch_one_doc_with_id(app): result = annif.rest.suggest_batch( "dummy-fi", {"documents": [{"text": "example text", "document_id": "doc-0"}]}, - ) + )[0] assert len(result) == 1 assert result[0]["results"][0]["label"] == "dummy-fi" assert result[0]["document_id"] == "doc-0" @@ -163,7 +163,7 @@ def test_rest_suggest_batch_two_docs(app): {"text": "another example text"}, ] }, - ) + )[0] assert len(result) == 2 assert result[1]["results"][0]["label"] == "dummy-fi" @@ -176,7 +176,7 @@ def test_rest_suggest_batch_with_language_override(app): "documents": [{"text": "example text"}], }, language="en", - ) + )[0] assert result[0]["results"][0]["label"] == "dummy" @@ -188,14 +188,18 @@ def test_rest_suggest_batch_with_limit_override(app): "documents": [{"text": "example text"}], }, limit=0, - ) + )[0] assert len(result[0]["results"]) == 0 def test_rest_learn_empty(app): with app.app_context(): response = annif.rest.learn("dummy-en", []) - assert response == (None, 204) # success, no output + assert response == ( + None, + 204, + {"Content-Type": "application/json"}, + ) # success, no output def test_rest_learn(app): @@ -207,11 +211,15 @@ def test_rest_learn(app): ] with app.app_context(): response = annif.rest.learn("dummy-en", documents) - assert response == (None, 204) # success, no output + assert response == ( + None, + 204, + {"Content-Type": "application/json"}, + ) # success, no output result = annif.rest.suggest( "dummy-en", {"text": "example text", "limit": 10, "threshold": 0.0} - ) + )[0] assert "results" in result assert result["results"][0]["uri"] == "http://example.org/none" assert result["results"][0]["label"] == "none"