From cd864d34dc3e45da9a89f8045489e3318e7a5f16 Mon Sep 17 00:00:00 2001 From: Andrei Neagu <5694077+GitHK@users.noreply.github.com> Date: Tue, 10 May 2022 20:27:27 +0200 Subject: [PATCH] =?UTF-8?q?=F0=9F=90=9B=20Fixes=20issues=20with=20dy-sidec?= =?UTF-8?q?ar=20removal=20by=20director-v2=20(#3029)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * extended API to raise errors * fix service removal * pylint * fixed method spec * downgrading log message * refactor error handling and propagation * pylint * refactor to simplify error handling * cleanup * remove unsued * cleanup * fix codestyle Co-authored-by: Andrei Neagu --- .../node_ports_common/exceptions.py | 1 + .../modules/dynamic_sidecar/client_api.py | 6 ++++ .../modules/dynamic_sidecar/docker_api.py | 2 +- .../modules/dynamic_sidecar/errors.py | 9 +++++ .../dynamic_sidecar/scheduler/events.py | 25 +++++++++++--- services/dynamic-sidecar/.env-devel | 6 ++++ services/dynamic-sidecar/openapi.json | 3 ++ .../api/containers_extension.py | 5 +++ .../core/application.py | 4 ++- .../core/error_handlers.py | 22 +++++++++++-- .../tests/unit/test_api_containers.py | 33 +++++++++++++++++++ .../dynamic-sidecar/tests/unit/test_utils.py | 2 ++ 12 files changed, 109 insertions(+), 9 deletions(-) diff --git a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/exceptions.py b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/exceptions.py index 0c2ba24d68e..763593486b9 100644 --- a/packages/simcore-sdk/src/simcore_sdk/node_ports_common/exceptions.py +++ b/packages/simcore-sdk/src/simcore_sdk/node_ports_common/exceptions.py @@ -122,6 +122,7 @@ class NodeNotFound(NodeportsException): """The given node_uuid was not found""" def __init__(self, node_uuid): + self.node_uuid = node_uuid super().__init__(f"the node id {node_uuid} was not found") diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py index 032d85f0e00..94f2556eded 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/client_api.py @@ -17,6 +17,7 @@ from .errors import ( DynamicSidecarUnexpectedResponseStatus, EntrypointContainerNotFoundError, + NodeportsDidNotFindNodeError, ) # PC -> SAN improvements to discuss @@ -246,6 +247,11 @@ async def service_push_output_ports( response = await self._client.post( url, json=port_keys, timeout=self._save_restore_timeout ) + if response.status_code == status.HTTP_404_NOT_FOUND: + json_error = response.json() + if json_error.get("code") == "dynamic_sidecar.nodeports.node_not_found": + raise NodeportsDidNotFindNodeError(node_uuid=json_error["node_uuid"]) + if response.status_code != status.HTTP_204_NO_CONTENT: raise DynamicSidecarUnexpectedResponseStatus(response, "output ports push") diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py index 96df423a0ff..49344d3954a 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/docker_api.py @@ -96,7 +96,7 @@ async def docker_client() -> AsyncIterator[aiodocker.docker.Docker]: except aiodocker.exceptions.DockerError as e: message = "Unexpected error from docker client" log_message = f"{message} {e.message}\n{traceback.format_exc()}" - log.warning(log_message) + log.info(log_message) raise GenericDockerError(message, e) from e finally: if client is not None: diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/errors.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/errors.py index 7370192ddd8..00078fa8baf 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/errors.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/errors.py @@ -3,6 +3,7 @@ from aiodocker.exceptions import DockerError from httpx import Response from models_library.projects_nodes import NodeID +from pydantic.errors import PydanticErrorMixin from ...core.errors import DirectorException @@ -52,3 +53,11 @@ def __init__(self, response: Response, msg: Optional[str] = None): ) super().__init__(message) self.response = response + + +class NodeportsDidNotFindNodeError(PydanticErrorMixin, DirectorException): + code = "dynamic_scheduler.output_ports_pulling.node_not_found" + msg_template = ( + "Could not find node '{node_uuid}' in the database. Did not upload data to S3, " + "most likely due to service being removed from the study." + ) diff --git a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py index 14296aa644e..02033647734 100644 --- a/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py +++ b/services/director-v2/src/simcore_service_director_v2/modules/dynamic_sidecar/scheduler/events.py @@ -27,6 +27,7 @@ from .._namepsace import get_compose_namespace from ..client_api import DynamicSidecarClient, get_dynamic_sidecar_client from ..docker_api import ( + are_all_services_present, create_network, create_service_and_get_id, get_node_id_from_task_for_service, @@ -49,6 +50,7 @@ DynamicSidecarUnexpectedResponseStatus, EntrypointContainerNotFoundError, GenericDockerError, + NodeportsDidNotFindNodeError, ) from ..volumes_resolver import DynamicSidecarVolumesPathsResolver from .abc import DynamicSchedulerEvent @@ -472,15 +474,23 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: logger.warning( "Could not contact dynamic-sidecar to begin destruction of %s\n%s", scheduler_data.service_name, - str(e), + f"{e}", ) - app_settings: AppSettings = app.state.settings dynamic_sidecar_settings: DynamicSidecarSettings = ( app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR ) - if scheduler_data.dynamic_sidecar.service_removal_state.can_save: + # only try to save the status if : + # - the dynamic-sidecar has finished booting correctly + # - it is requested to save the state + if ( + scheduler_data.dynamic_sidecar.service_removal_state.can_save + and await are_all_services_present( + node_uuid=scheduler_data.node_uuid, + dynamic_sidecar_settings=app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR, + ) + ): dynamic_sidecar_client = get_dynamic_sidecar_client(app) dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint @@ -501,7 +511,12 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: dynamic_sidecar_endpoint, ) ) - await logged_gather(*tasks) + + try: + await logged_gather(*tasks) + except NodeportsDidNotFindNodeError as err: + logger.warning("%s", f"{err}") + logger.info("Ports data pushed by dynamic-sidecar") # NOTE: ANE: need to use more specific exception here except Exception as e: # pylint: disable=broad-except @@ -511,7 +526,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None: "state and upload outputs %s\n%s" ), scheduler_data.service_name, - str(e), + f"{e}", ) # ensure dynamic-sidecar does not get removed # user data can be manually saved and manual diff --git a/services/dynamic-sidecar/.env-devel b/services/dynamic-sidecar/.env-devel index 493d7fd6afe..48b2789e229 100644 --- a/services/dynamic-sidecar/.env-devel +++ b/services/dynamic-sidecar/.env-devel @@ -22,3 +22,9 @@ REGISTRY_AUTH=false REGISTRY_USER=test REGISTRY_PW=test REGISTRY_SSL=false + +S3_ENDPOINT=MINIO +S3_ACCESS_KEY=mocked +S3_SECRET_KEY=mocked +S3_BUCKET_NAME=mocked +R_CLONE_PROVIDER=MINIO \ No newline at end of file diff --git a/services/dynamic-sidecar/openapi.json b/services/dynamic-sidecar/openapi.json index 518bffd708a..5fda174802a 100644 --- a/services/dynamic-sidecar/openapi.json +++ b/services/dynamic-sidecar/openapi.json @@ -510,6 +510,9 @@ "204": { "description": "Successful Response" }, + "404": { + "description": "Could not find node_uuid in the database" + }, "422": { "description": "Validation Error", "content": { diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_extension.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_extension.py index 20e72fb1741..163f80c027a 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_extension.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/api/containers_extension.py @@ -195,6 +195,11 @@ async def pull_output_ports( summary="Push output ports data", response_class=Response, status_code=status.HTTP_204_NO_CONTENT, + responses={ + status.HTTP_404_NOT_FOUND: { + "description": "Could not find node_uuid in the database" + } + }, ) async def push_output_ports( port_keys: Optional[List[str]] = None, diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py index 800a36294c4..d13b5a24f05 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/application.py @@ -3,6 +3,7 @@ from fastapi import FastAPI from servicelib.fastapi.openapi import override_fastapi_openapi_method +from simcore_sdk.node_ports_common.exceptions import NodeNotFound from .._meta import API_VTAG, __version__ from ..api import main_router @@ -10,7 +11,7 @@ from ..models.schemas.application_health import ApplicationHealth from ..modules.directory_watcher import setup_directory_watcher from .docker_logs import setup_background_log_fetcher -from .error_handlers import http_error_handler +from .error_handlers import http_error_handler, node_not_found_error_handler from .errors import BaseDynamicSidecarError from .rabbitmq import setup_rabbitmq from .remote_debug import setup as remote_debug_setup @@ -78,6 +79,7 @@ def assemble_application() -> FastAPI: application.include_router(main_router) # error handlers + application.add_exception_handler(NodeNotFound, node_not_found_error_handler) application.add_exception_handler(BaseDynamicSidecarError, http_error_handler) # also sets up mounted_volumes diff --git a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/error_handlers.py b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/error_handlers.py index 43677610581..8fbba2fb1c8 100644 --- a/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/error_handlers.py +++ b/services/dynamic-sidecar/src/simcore_service_dynamic_sidecar/core/error_handlers.py @@ -1,11 +1,29 @@ +from fastapi import status from fastapi.encoders import jsonable_encoder +from simcore_sdk.node_ports_common.exceptions import NodeNotFound from starlette.requests import Request from starlette.responses import JSONResponse from .errors import BaseDynamicSidecarError -async def http_error_handler(_: Request, exc: BaseDynamicSidecarError) -> JSONResponse: +async def http_error_handler( + _: Request, exception: BaseDynamicSidecarError +) -> JSONResponse: return JSONResponse( - content=jsonable_encoder({"errors": [exc.message]}), status_code=exc.status + content=jsonable_encoder({"errors": [exception.message]}), + status_code=exception.status, + ) + + +async def node_not_found_error_handler( + _: Request, exception: NodeNotFound +) -> JSONResponse: + error_fields = dict( + code="dynamic_sidecar.nodeports.node_not_found", + message=f"{exception}", + node_uuid=f"{exception.node_uuid}", + ) + return JSONResponse( + content=jsonable_encoder(error_fields), status_code=status.HTTP_404_NOT_FOUND ) diff --git a/services/dynamic-sidecar/tests/unit/test_api_containers.py b/services/dynamic-sidecar/tests/unit/test_api_containers.py index cf03f717c2e..c8f850a7183 100644 --- a/services/dynamic-sidecar/tests/unit/test_api_containers.py +++ b/services/dynamic-sidecar/tests/unit/test_api_containers.py @@ -20,6 +20,7 @@ from fastapi import FastAPI, status from models_library.services import ServiceOutput from pytest_mock.plugin import MockerFixture +from simcore_sdk.node_ports_common.exceptions import NodeNotFound from simcore_service_dynamic_sidecar._meta import API_VTAG from simcore_service_dynamic_sidecar.core.settings import DynamicSidecarSettings from simcore_service_dynamic_sidecar.core.shared_handlers import ( @@ -180,6 +181,22 @@ def mock_nodeports(mocker: MockerFixture) -> None: ) +@pytest.fixture +def missing_node_uuid(faker: faker.Faker) -> str: + return faker.uuid4() + + +@pytest.fixture +def mock_node_missing(mocker: MockerFixture, missing_node_uuid: str) -> None: + async def _mocked(*args, **kwargs) -> None: + raise NodeNotFound(missing_node_uuid) + + mocker.patch( + "simcore_service_dynamic_sidecar.modules.nodeports.upload_outputs", + side_effect=_mocked, + ) + + @pytest.fixture def mock_data_manager(mocker: MockerFixture) -> None: mocker.patch( @@ -602,6 +619,22 @@ async def test_container_push_output_ports( assert response.text == "" +async def test_container_push_output_ports_missing_node( + test_client: TestClient, + mock_port_keys: List[str], + missing_node_uuid: str, + mock_node_missing: None, +) -> None: + response = await test_client.post( + f"/{API_VTAG}/containers/ports/outputs:push", json=mock_port_keys + ) + assert response.status_code == status.HTTP_404_NOT_FOUND, response.text + error_detail = response.json() + assert error_detail["message"] == f"the node id {missing_node_uuid} was not found" + assert error_detail["code"] == "dynamic_sidecar.nodeports.node_not_found" + assert error_detail["node_uuid"] == missing_node_uuid + + def _get_entrypoint_container_name( test_client: TestClient, dynamic_sidecar_network_name: str ) -> str: diff --git a/services/dynamic-sidecar/tests/unit/test_utils.py b/services/dynamic-sidecar/tests/unit/test_utils.py index 270ab8df73f..d6757838531 100644 --- a/services/dynamic-sidecar/tests/unit/test_utils.py +++ b/services/dynamic-sidecar/tests/unit/test_utils.py @@ -1,4 +1,6 @@ # pylint: disable=redefined-outer-name +# pylint: disable=expression-not-assigned + import pytest from _pytest.monkeypatch import MonkeyPatch from settings_library.docker_registry import RegistrySettings