Skip to content

Commit

Permalink
🐛 Fixes issues with dy-sidecar removal by director-v2 (#3029)
Browse files Browse the repository at this point in the history
* extended API to raise errors

* fix service removal

* pylint

* fixed method spec

* downgrading log message

* refactor error handling and propagation

* pylint

* refactor to simplify error handling

* cleanup

* remove unsued

* cleanup

* fix codestyle

Co-authored-by: Andrei Neagu <[email protected]>
  • Loading branch information
GitHK and Andrei Neagu authored May 10, 2022
1 parent 375cbd3 commit cd864d3
Show file tree
Hide file tree
Showing 12 changed files with 109 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,7 @@ class NodeNotFound(NodeportsException):
"""The given node_uuid was not found"""

def __init__(self, node_uuid):
self.node_uuid = node_uuid
super().__init__(f"the node id {node_uuid} was not found")


Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
from .errors import (
DynamicSidecarUnexpectedResponseStatus,
EntrypointContainerNotFoundError,
NodeportsDidNotFindNodeError,
)

# PC -> SAN improvements to discuss
Expand Down Expand Up @@ -246,6 +247,11 @@ async def service_push_output_ports(
response = await self._client.post(
url, json=port_keys, timeout=self._save_restore_timeout
)
if response.status_code == status.HTTP_404_NOT_FOUND:
json_error = response.json()
if json_error.get("code") == "dynamic_sidecar.nodeports.node_not_found":
raise NodeportsDidNotFindNodeError(node_uuid=json_error["node_uuid"])

if response.status_code != status.HTTP_204_NO_CONTENT:
raise DynamicSidecarUnexpectedResponseStatus(response, "output ports push")

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ async def docker_client() -> AsyncIterator[aiodocker.docker.Docker]:
except aiodocker.exceptions.DockerError as e:
message = "Unexpected error from docker client"
log_message = f"{message} {e.message}\n{traceback.format_exc()}"
log.warning(log_message)
log.info(log_message)
raise GenericDockerError(message, e) from e
finally:
if client is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from aiodocker.exceptions import DockerError
from httpx import Response
from models_library.projects_nodes import NodeID
from pydantic.errors import PydanticErrorMixin

from ...core.errors import DirectorException

Expand Down Expand Up @@ -52,3 +53,11 @@ def __init__(self, response: Response, msg: Optional[str] = None):
)
super().__init__(message)
self.response = response


class NodeportsDidNotFindNodeError(PydanticErrorMixin, DirectorException):
code = "dynamic_scheduler.output_ports_pulling.node_not_found"
msg_template = (
"Could not find node '{node_uuid}' in the database. Did not upload data to S3, "
"most likely due to service being removed from the study."
)
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from .._namepsace import get_compose_namespace
from ..client_api import DynamicSidecarClient, get_dynamic_sidecar_client
from ..docker_api import (
are_all_services_present,
create_network,
create_service_and_get_id,
get_node_id_from_task_for_service,
Expand All @@ -49,6 +50,7 @@
DynamicSidecarUnexpectedResponseStatus,
EntrypointContainerNotFoundError,
GenericDockerError,
NodeportsDidNotFindNodeError,
)
from ..volumes_resolver import DynamicSidecarVolumesPathsResolver
from .abc import DynamicSchedulerEvent
Expand Down Expand Up @@ -472,15 +474,23 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
logger.warning(
"Could not contact dynamic-sidecar to begin destruction of %s\n%s",
scheduler_data.service_name,
str(e),
f"{e}",
)

app_settings: AppSettings = app.state.settings
dynamic_sidecar_settings: DynamicSidecarSettings = (
app_settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR
)

if scheduler_data.dynamic_sidecar.service_removal_state.can_save:
# only try to save the status if :
# - the dynamic-sidecar has finished booting correctly
# - it is requested to save the state
if (
scheduler_data.dynamic_sidecar.service_removal_state.can_save
and await are_all_services_present(
node_uuid=scheduler_data.node_uuid,
dynamic_sidecar_settings=app.state.settings.DYNAMIC_SERVICES.DYNAMIC_SIDECAR,
)
):
dynamic_sidecar_client = get_dynamic_sidecar_client(app)
dynamic_sidecar_endpoint = scheduler_data.dynamic_sidecar.endpoint

Expand All @@ -501,7 +511,12 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
dynamic_sidecar_endpoint,
)
)
await logged_gather(*tasks)

try:
await logged_gather(*tasks)
except NodeportsDidNotFindNodeError as err:
logger.warning("%s", f"{err}")

logger.info("Ports data pushed by dynamic-sidecar")
# NOTE: ANE: need to use more specific exception here
except Exception as e: # pylint: disable=broad-except
Expand All @@ -511,7 +526,7 @@ async def action(cls, app: FastAPI, scheduler_data: SchedulerData) -> None:
"state and upload outputs %s\n%s"
),
scheduler_data.service_name,
str(e),
f"{e}",
)
# ensure dynamic-sidecar does not get removed
# user data can be manually saved and manual
Expand Down
6 changes: 6 additions & 0 deletions services/dynamic-sidecar/.env-devel
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,9 @@ REGISTRY_AUTH=false
REGISTRY_USER=test
REGISTRY_PW=test
REGISTRY_SSL=false

S3_ENDPOINT=MINIO
S3_ACCESS_KEY=mocked
S3_SECRET_KEY=mocked
S3_BUCKET_NAME=mocked
R_CLONE_PROVIDER=MINIO
3 changes: 3 additions & 0 deletions services/dynamic-sidecar/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -510,6 +510,9 @@
"204": {
"description": "Successful Response"
},
"404": {
"description": "Could not find node_uuid in the database"
},
"422": {
"description": "Validation Error",
"content": {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,11 @@ async def pull_output_ports(
summary="Push output ports data",
response_class=Response,
status_code=status.HTTP_204_NO_CONTENT,
responses={
status.HTTP_404_NOT_FOUND: {
"description": "Could not find node_uuid in the database"
}
},
)
async def push_output_ports(
port_keys: Optional[List[str]] = None,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,15 @@

from fastapi import FastAPI
from servicelib.fastapi.openapi import override_fastapi_openapi_method
from simcore_sdk.node_ports_common.exceptions import NodeNotFound

from .._meta import API_VTAG, __version__
from ..api import main_router
from ..models.domains.shared_store import SharedStore
from ..models.schemas.application_health import ApplicationHealth
from ..modules.directory_watcher import setup_directory_watcher
from .docker_logs import setup_background_log_fetcher
from .error_handlers import http_error_handler
from .error_handlers import http_error_handler, node_not_found_error_handler
from .errors import BaseDynamicSidecarError
from .rabbitmq import setup_rabbitmq
from .remote_debug import setup as remote_debug_setup
Expand Down Expand Up @@ -78,6 +79,7 @@ def assemble_application() -> FastAPI:
application.include_router(main_router)

# error handlers
application.add_exception_handler(NodeNotFound, node_not_found_error_handler)
application.add_exception_handler(BaseDynamicSidecarError, http_error_handler)

# also sets up mounted_volumes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,29 @@
from fastapi import status
from fastapi.encoders import jsonable_encoder
from simcore_sdk.node_ports_common.exceptions import NodeNotFound
from starlette.requests import Request
from starlette.responses import JSONResponse

from .errors import BaseDynamicSidecarError


async def http_error_handler(_: Request, exc: BaseDynamicSidecarError) -> JSONResponse:
async def http_error_handler(
_: Request, exception: BaseDynamicSidecarError
) -> JSONResponse:
return JSONResponse(
content=jsonable_encoder({"errors": [exc.message]}), status_code=exc.status
content=jsonable_encoder({"errors": [exception.message]}),
status_code=exception.status,
)


async def node_not_found_error_handler(
_: Request, exception: NodeNotFound
) -> JSONResponse:
error_fields = dict(
code="dynamic_sidecar.nodeports.node_not_found",
message=f"{exception}",
node_uuid=f"{exception.node_uuid}",
)
return JSONResponse(
content=jsonable_encoder(error_fields), status_code=status.HTTP_404_NOT_FOUND
)
33 changes: 33 additions & 0 deletions services/dynamic-sidecar/tests/unit/test_api_containers.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from fastapi import FastAPI, status
from models_library.services import ServiceOutput
from pytest_mock.plugin import MockerFixture
from simcore_sdk.node_ports_common.exceptions import NodeNotFound
from simcore_service_dynamic_sidecar._meta import API_VTAG
from simcore_service_dynamic_sidecar.core.settings import DynamicSidecarSettings
from simcore_service_dynamic_sidecar.core.shared_handlers import (
Expand Down Expand Up @@ -180,6 +181,22 @@ def mock_nodeports(mocker: MockerFixture) -> None:
)


@pytest.fixture
def missing_node_uuid(faker: faker.Faker) -> str:
return faker.uuid4()


@pytest.fixture
def mock_node_missing(mocker: MockerFixture, missing_node_uuid: str) -> None:
async def _mocked(*args, **kwargs) -> None:
raise NodeNotFound(missing_node_uuid)

mocker.patch(
"simcore_service_dynamic_sidecar.modules.nodeports.upload_outputs",
side_effect=_mocked,
)


@pytest.fixture
def mock_data_manager(mocker: MockerFixture) -> None:
mocker.patch(
Expand Down Expand Up @@ -602,6 +619,22 @@ async def test_container_push_output_ports(
assert response.text == ""


async def test_container_push_output_ports_missing_node(
test_client: TestClient,
mock_port_keys: List[str],
missing_node_uuid: str,
mock_node_missing: None,
) -> None:
response = await test_client.post(
f"/{API_VTAG}/containers/ports/outputs:push", json=mock_port_keys
)
assert response.status_code == status.HTTP_404_NOT_FOUND, response.text
error_detail = response.json()
assert error_detail["message"] == f"the node id {missing_node_uuid} was not found"
assert error_detail["code"] == "dynamic_sidecar.nodeports.node_not_found"
assert error_detail["node_uuid"] == missing_node_uuid


def _get_entrypoint_container_name(
test_client: TestClient, dynamic_sidecar_network_name: str
) -> str:
Expand Down
2 changes: 2 additions & 0 deletions services/dynamic-sidecar/tests/unit/test_utils.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
# pylint: disable=redefined-outer-name
# pylint: disable=expression-not-assigned

import pytest
from _pytest.monkeypatch import MonkeyPatch
from settings_library.docker_registry import RegistrySettings
Expand Down

0 comments on commit cd864d3

Please sign in to comment.