inmanta · wouterdb · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024 · Sep 13, 2024
diff --git a/changelogs/unreleased/8010-agent-scheduler_2.yml b/changelogs/unreleased/8010-agent-scheduler_2.yml
@@ -0,0 +1,4 @@
+description: Add testcases for agent and server
+issue-nr: 8010
+change-type: patch
+destination-branches: [master]
diff --git a/src/inmanta/agent/executor.py b/src/inmanta/agent/executor.py
@@ -40,6 +40,7 @@
 from inmanta import const
 from inmanta.agent import config as cfg
 from inmanta.agent import resourcepool
+from inmanta.const import ResourceState
 from inmanta.data.model import PipConfig, ResourceIdStr, ResourceType, ResourceVersionIdStr
 from inmanta.env import PythonEnvironment
 from inmanta.loader import ModuleSource
@@ -486,7 +487,7 @@ async def execute(
         gid: uuid.UUID,
         resource_details: ResourceDetails,
         reason: str,
-    ) -> None:
+    ) -> ResourceState:
         """
         Perform the actual deployment of the resource by calling the loaded handler code
 

diff --git a/src/inmanta/agent/forking_executor.py b/src/inmanta/agent/forking_executor.py
@@ -480,7 +480,7 @@ async def call(self, context: ExecutorContext) -> None:
         await context.get(self.agent_name).dry_run(self.resources, self.dry_run_id)
 
 
-class ExecuteCommand(inmanta.protocol.ipc_light.IPCMethod[ExecutorContext, None]):
+class ExecuteCommand(inmanta.protocol.ipc_light.IPCMethod[ExecutorContext, const.ResourceState]):
     """Run a deploy in an executor"""
 
     def __init__(
@@ -495,8 +495,8 @@ def __init__(
         self.resource_details = resource_details
         self.reason = reason
 
-    async def call(self, context: ExecutorContext) -> None:
-        await context.get(self.agent_name).execute(self.gid, self.resource_details, self.reason)
+    async def call(self, context: ExecutorContext) -> const.ResourceState:
+        return await context.get(self.agent_name).execute(self.gid, self.resource_details, self.reason)
 
 
 class FactsCommand(inmanta.protocol.ipc_light.IPCMethod[ExecutorContext, inmanta.types.Apireturn]):
@@ -803,8 +803,8 @@ async def execute(
         gid: uuid.UUID,
         resource_details: "inmanta.agent.executor.ResourceDetails",
         reason: str,
-    ) -> None:
-        await self.call(ExecuteCommand(self.id.agent_name, gid, resource_details, reason))
+    ) -> const.ResourceState:
+        return await self.call(ExecuteCommand(self.id.agent_name, gid, resource_details, reason))
 
     async def get_facts(self, resource: "inmanta.agent.executor.ResourceDetails") -> inmanta.types.Apireturn:
         return await self.call(FactsCommand(self.id.agent_name, resource))

diff --git a/src/inmanta/agent/in_process_executor.py b/src/inmanta/agent/in_process_executor.py
@@ -32,7 +32,7 @@
 from inmanta.agent import executor, handler
 from inmanta.agent.executor import FailedResources, ResourceDetails
 from inmanta.agent.handler import HandlerAPI, SkipResource
-from inmanta.const import ParameterSource
+from inmanta.const import ParameterSource, ResourceState
 from inmanta.data.model import AttributeStateChange, ResourceIdStr, ResourceVersionIdStr
 from inmanta.loader import CodeLoader
 from inmanta.resources import Id, Resource
@@ -257,11 +257,11 @@ async def execute(
         gid: uuid.UUID,
         resource_details: ResourceDetails,
         reason: str,
-    ) -> None:
+    ) -> ResourceState:
         try:
             resource: Resource | None = await self.deserialize(resource_details, const.ResourceAction.deploy)
         except Exception:
-            return
+            return const.ResourceState.unavailable
         assert resource is not None
         ctx = handler.HandlerContext(resource, logger=self.logger)
         ctx.debug(
@@ -279,7 +279,7 @@ async def execute(
         except Exception:
             ctx.set_status(const.ResourceState.failed)
             ctx.exception("Failed to report the start of the deployment to the server")
-            return
+            return const.ResourceState.failed
 
         async with self.activity_lock:
             with self._cache:
@@ -298,6 +298,11 @@ async def execute(
                 ctx.error("Failed to send facts to the server %s", set_fact_response.result)
 
         await self._report_resource_deploy_done(resource_details, ctx)
+        # context should not be none at this point
+        if ctx.status is None:
+            ctx.error("Status not set, should not happen")
+            return const.ResourceState.failed
+        return ctx.status
 
     async def dry_run(
         self,

diff --git a/src/inmanta/app.py b/src/inmanta/app.py
@@ -64,6 +64,7 @@
 from inmanta.export import cfg_env
 from inmanta.logging import InmantaLoggerConfig, LoggerMode, LoggerModeManager, _is_on_tty
 from inmanta.server.bootloader import InmantaBootloader
+from inmanta.server.services.databaseservice import server_db_connect
 from inmanta.signals import safe_shutdown, setup_signal_handlers
 from inmanta.util import get_compiler_version
 from inmanta.warnings import WarningsManager
@@ -158,11 +159,16 @@ def start_scheduler(options: argparse.Namespace) -> None:
         AsyncHTTPClient.configure(None, max_clients=max_clients)
 
     tracing.configure_logfire("agent_rs")
+
     util.ensure_event_loop()
     a = agent_new.Agent()
 
+    async def start() -> None:
+        await server_db_connect()
+        await a.start()
+
     setup_signal_handlers(a.stop)
-    IOLoop.current().add_callback(a.start)
+    IOLoop.current().add_callback(start)
     IOLoop.current().start()
     LOGGER.info("Agent with Resource scheduler Shutdown complete")
 

diff --git a/src/inmanta/deploy/scheduler.py b/src/inmanta/deploy/scheduler.py
@@ -17,22 +17,19 @@
 """
 
 import asyncio
-import datetime
 import logging
-import traceback
 import uuid
 from collections.abc import Mapping, Set
-from typing import Any, Optional
+from typing import Any
 
-from inmanta import const, data, resources
+from inmanta import data, resources
 from inmanta.agent import executor
 from inmanta.agent.code_manager import CodeManager
-from inmanta.const import ResourceAction
 from inmanta.data import Resource
 from inmanta.data.model import ResourceIdStr
 from inmanta.deploy import work
 from inmanta.deploy.state import ModelState, ResourceDetails, ResourceStatus
-from inmanta.deploy.work import PoisonPill
+from inmanta.deploy.work import Task
 from inmanta.protocol import Client
 
 LOGGER = logging.getLogger(__name__)
@@ -113,9 +110,7 @@ async def deploy(self) -> None:
         """
         async with self._scheduler_lock:
             # FIXME[#8008]: more efficient access to dirty set by caching it on the ModelState
-            dirty: Set[ResourceIdStr] = {
-                r for r, details in self._state.resource_state.items() if details.status == ResourceStatus.HAS_UPDATE
-            }
+            dirty: Set[ResourceIdStr] = {r for r, details in self._state.resource_state.items() if details.needs_deploy()}
             # FIXME[#8008]: pass in running deploys
             self._work.update_state(ensure_scheduled=dirty, running_deploys=set())
 
@@ -248,115 +243,14 @@ def start_for_agent(self, agent: str) -> None:
         """Start processing for the given agent"""
         self._workers[agent] = asyncio.create_task(self._run_for_agent(agent))
 
-    async def _run_task(self, agent: str, task: work.Task, resource_details: ResourceDetails) -> None:
-        """Run a task"""
-        match task:
-            case work.Deploy():
-                await self.perform_deploy(agent, resource_details)
-            case _:
-                print("Nothing here!")
-
-    async def perform_deploy(self, agent: str, resource_details: ResourceDetails) -> None:
-        """
-        Perform an actual deploy on an agent.
-
-        :param agent:
-        :param resource_details:
-        """
-        # FIXME: WDB to Sander: is the version of the state the correct version?
-        #   It may happen that the set of types no longer matches the version?
-        # FIXME: code loading interface is not nice like this,
-        #   - we may want to track modules per agent, instead of types
-        #   - we may also want to track the module version vs the model version
-        #       as it avoid the problem of fast chanfing model versions
-
-        async def report_deploy_failure(excn: Exception) -> None:
-            res_type = resource_details.id.entity_type
-            log_line = data.LogLine.log(
-                logging.ERROR,
-                "All resources of type `%(res_type)s` failed to load handler code or install handler code "
-                "dependencies: `%(error)s`\n%(traceback)s",
-                res_type=res_type,
-                error=str(excn),
-                traceback="".join(traceback.format_tb(excn.__traceback__)),
-            )
-            await self._client.resource_action_update(
-                tid=self._environment,
-                resource_ids=[resource_details.rvid],
-                action_id=uuid.uuid4(),
-                action=ResourceAction.deploy,
-                started=datetime.datetime.now().astimezone(),
-                finished=datetime.datetime.now().astimezone(),
-                messages=[log_line],
-                status=const.ResourceState.unavailable,
-            )
-
-        # Find code
-        code, invalid_resources = await self._code_manager.get_code(
-            environment=self._environment, version=self._state.version, resource_types=self._state.get_types_for_agent(agent)
-        )
-
-        # Bail out if this failed
-        if resource_details.id.entity_type in invalid_resources:
-            await report_deploy_failure(invalid_resources[resource_details.id.entity_type])
-            return
-
-        # Get executor
-        my_executor: executor.Executor = await self._executor_manager.get_executor(
-            agent_name=agent, agent_uri="NO_URI", code=code
-        )
-        failed_resources = my_executor.failed_resources
-
-        # Bail out if this failed
-        if resource_details.id.entity_type in failed_resources:
-            await report_deploy_failure(failed_resources[resource_details.id.entity_type])
-            return
-
-        # DEPLOY!!!
-        gid = uuid.uuid4()
-        # FIXME: reason argument is not used
-        await my_executor.execute(gid, resource_details, "New Scheduler initiated action")
-
-    async def _work_once(self, agent: str) -> None:
-        task: work.Task = await self._work.agent_queues.queue_get(agent)
-        # FIXME[#8008]: skip and reschedule deploy / refresh-fact task if resource marked as update pending?
-        if isinstance(task, PoisonPill):
-            # wake up and return, queue will be shut down
-            return
-        resource_details: ResourceDetails
-        async with self._scheduler_lock:
-            # fetch resource details atomically under lock
-            try:
-                resource_details = self._state.resources[task.resource]
-            except KeyError:
-                # Stale resource, can simply be dropped.
-                # May occur in rare races between new_version and acquiring the lock we're under here. This race is safe
-                # because of this check, and an intrinsic part of the locking design because it's preferred over wider
-                # locking for performance reasons.
-                return
-
-        await self._run_task(agent, task, resource_details)
-
-        # post-processing
-        match task:
-            case work.Deploy():
-                async with self._scheduler_lock:
-                    # refresh resource details for latest model state
-                    new_details: Optional[ResourceDetails] = self._state.resources.get(task.resource, None)
-                    if new_details is not None and new_details.attribute_hash == resource_details.attribute_hash:
-                        # FIXME[#8010]: pass success/failure to notify_provides()
-                        # FIXME[#8008]: iff deploy was successful set resource status and deployment result
-                        #               in self.state.resources
-                        self._work.notify_provides(task)
-                    # The deploy that finished has become stale (state has changed since the deploy started).
-                    # Nothing to report on a stale deploy.
-                    # A new deploy for the current model state will have been queued already.
-            case _:
-                # nothing to do
-                pass
-        self._work.agent_queues.task_done(agent)
-
     async def _run_for_agent(self, agent: str) -> None:
         """Main loop for one agent"""
         while self._running:
-            await self._work_once(agent)
+            task: Task = await self._work.agent_queues.queue_get(agent)
+            try:
+                # FIXME[#8008]: skip and reschedule deploy / refresh-fact task if resource marked as update pending?
+                await task.execute(self, agent)
+            except Exception:
+                LOGGER.exception("Task %s for agent %s has failed and the exception was not properly handled", task, agent)
+
+            self._work.agent_queues.task_done(agent)
diff --git a/src/inmanta/deploy/state.py b/src/inmanta/deploy/state.py
@@ -95,6 +95,9 @@ class ResourceState:
     status: ResourceStatus
     deployment_result: DeploymentResult
 
+    def needs_deploy(self) -> bool:
+        return self.status == ResourceStatus.HAS_UPDATE or self.deployment_result != DeploymentResult.DEPLOYED
+
 
 @dataclass(kw_only=True)
 class ModelState: