feat(api): Enable error recovery without the feature flag (#15588)

Opentrons · Jul 8, 2024 · fca049d · fca049d
1 parent 4c5e3f9
commit fca049d
Showing 14 changed files with 97 additions and 38 deletions.
diff --git a/api/src/opentrons/config/advanced_settings.py b/api/src/opentrons/config/advanced_settings.py
@@ -211,11 +211,8 @@ class Setting(NamedTuple):
         title="Enable error recovery experiments",
         description=(
             "Do not enable."
-            " This is an Opentrons internal setting to experiment with"
+            " This is an Opentrons internal setting to enable additional,"
             " in-development error recovery features."
-            " This will interfere with your protocol runs,"
-            " corrupt your robot's storage,"
-            " bring misfortune and pestilence upon you and your livestock, etc."
         ),
         robot_type=[RobotTypeEnum.FLEX],
         internal_only=True,

diff --git a/api/src/opentrons/execute.py b/api/src/opentrons/execute.py
@@ -58,6 +58,7 @@
     Config,
     DeckType,
     EngineStatus,
+    error_recovery_policy,
 )
 from opentrons.protocol_engine.create_protocol_engine import (
     create_protocol_engine_in_thread,
@@ -545,6 +546,7 @@ def _create_live_context_pe(
         create_protocol_engine_in_thread(
             hardware_api=hardware_api_wrapped,
             config=_get_protocol_engine_config(),
+            error_recovery_policy=error_recovery_policy.never_recover,
             drop_tips_after_run=False,
             post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE,
             load_fixed_trash=should_load_fixed_trash_labware_for_python_protocol(
@@ -628,6 +630,7 @@ async def run(protocol_source: ProtocolSource) -> None:
         protocol_engine = await create_protocol_engine(
             hardware_api=hardware_api_wrapped,
             config=_get_protocol_engine_config(),
+            error_recovery_policy=error_recovery_policy.never_recover,
             load_fixed_trash=should_load_fixed_trash(protocol_source.config),
         )
 

diff --git a/api/src/opentrons/protocol_engine/create_protocol_engine.py b/api/src/opentrons/protocol_engine/create_protocol_engine.py
@@ -5,22 +5,23 @@
 
 from opentrons.hardware_control import HardwareControlAPI
 from opentrons.hardware_control.types import DoorState
+from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy
 from opentrons.util.async_helpers import async_context_manager_in_thread
 
 from .protocol_engine import ProtocolEngine
 from .resources import DeckDataProvider, ModuleDataProvider
 from .state import Config, StateStore
 from .types import PostRunHardwareState, DeckConfigurationType
 
-
-# TODO(mm, 2023-06-16): Arguably, this not being a context manager makes us prone to forgetting to
-# clean it up properly, especially in tests. See e.g. https://opentrons.atlassian.net/browse/RSS-222
 from .engine_support import create_run_orchestrator
 
 
+# TODO(mm, 2023-06-16): Arguably, this not being a context manager makes us prone to forgetting to
+# clean it up properly, especially in tests. See e.g. https://opentrons.atlassian.net/browse/RSS-222
 async def create_protocol_engine(
     hardware_api: HardwareControlAPI,
     config: Config,
+    error_recovery_policy: ErrorRecoveryPolicy,
     load_fixed_trash: bool = False,
     deck_configuration: typing.Optional[DeckConfigurationType] = None,
     notify_publishers: typing.Optional[typing.Callable[[], None]] = None,
@@ -30,6 +31,8 @@ async def create_protocol_engine(
     Arguments:
         hardware_api: Hardware control API to pass down to dependencies.
         config: ProtocolEngine configuration.
+        error_recovery_policy: The error recovery policy to create the engine with.
+            See documentation on `ErrorRecoveryPolicy`.
         load_fixed_trash: Automatically load fixed trash labware in engine.
         deck_configuration: The initial deck configuration the engine will be instantiated with.
         notify_publishers: Notifies robot server publishers of internal state change.
@@ -56,13 +59,15 @@ async def create_protocol_engine(
     return ProtocolEngine(
         state_store=state_store,
         hardware_api=hardware_api,
+        error_recovery_policy=error_recovery_policy,
     )
 
 
 @contextlib.contextmanager
 def create_protocol_engine_in_thread(
     hardware_api: HardwareControlAPI,
     config: Config,
+    error_recovery_policy: ErrorRecoveryPolicy,
     drop_tips_after_run: bool,
     post_run_hardware_state: PostRunHardwareState,
     load_fixed_trash: bool = False,
@@ -90,6 +95,7 @@ def create_protocol_engine_in_thread(
         _protocol_engine(
             hardware_api,
             config,
+            error_recovery_policy,
             drop_tips_after_run,
             post_run_hardware_state,
             load_fixed_trash,
@@ -105,13 +111,15 @@ def create_protocol_engine_in_thread(
 async def _protocol_engine(
     hardware_api: HardwareControlAPI,
     config: Config,
+    error_recovery_policy: ErrorRecoveryPolicy,
     drop_tips_after_run: bool,
     post_run_hardware_state: PostRunHardwareState,
     load_fixed_trash: bool = False,
 ) -> typing.AsyncGenerator[ProtocolEngine, None]:
     protocol_engine = await create_protocol_engine(
         hardware_api=hardware_api,
         config=config,
+        error_recovery_policy=error_recovery_policy,
         load_fixed_trash=load_fixed_trash,
     )
 

diff --git a/api/src/opentrons/protocol_engine/error_recovery_policy.py b/api/src/opentrons/protocol_engine/error_recovery_policy.py
@@ -1,13 +1,16 @@
 # noqa: D100
 
+from __future__ import annotations
+
 import enum
-from typing import Optional, Protocol
+from typing import Optional, Protocol, TYPE_CHECKING
 
-from opentrons.config import feature_flags as ff
-from opentrons.protocol_engine.commands import (
-    Command,
-    CommandDefinedErrorData,
-)
+if TYPE_CHECKING:
+    from opentrons.protocol_engine.commands import (
+        Command,
+        CommandDefinedErrorData,
+    )
+    from opentrons.protocol_engine.state.config import Config
 
 
 class ErrorRecoveryType(enum.Enum):
@@ -48,27 +51,44 @@ class ErrorRecoveryPolicy(Protocol):
 
     @staticmethod
     def __call__(  # noqa: D102
-        failed_command: Command, defined_error_data: Optional[CommandDefinedErrorData]
+        config: Config,
+        failed_command: Command,
+        defined_error_data: Optional[CommandDefinedErrorData],
     ) -> ErrorRecoveryType:
         ...
 
 
-def error_recovery_by_ff(
-    failed_command: Command, defined_error_data: Optional[CommandDefinedErrorData]
+# todo(mm, 2024-07-05): This "static" policy will need to somehow become dynamic for
+# https://opentrons.atlassian.net/browse/EXEC-589.
+def standard_run_policy(
+    config: Config,
+    failed_command: Command,
+    defined_error_data: Optional[CommandDefinedErrorData],
 ) -> ErrorRecoveryType:
-    """Use API feature flags to decide how to handle an error.
-
-    This is just for development. This should be replaced by a proper config
-    system exposed through robot-server's HTTP API.
-    """
-    # todo(mm, 2024-03-18): Do we need to do anything explicit here to disable
-    # error recovery on the OT-2?
-    error_is_defined = defined_error_data is not None
+    """An error recovery policy suitable for normal protocol runs via robot-server."""
+    # Although error recovery can theoretically work on OT-2s, we haven't tested it,
+    # and it's generally scarier because the OT-2 has much less hardware feedback.
+    robot_is_flex = config.robot_type == "OT-3 Standard"
     # If the error is defined, we're taking that to mean that we should
-    # WAIT_FOR_RECOVERY. This is not necessarily the right production logic--we might
+    # WAIT_FOR_RECOVERY. This is not necessarily the right long-term logic--we might
     # want to FAIL_RUN on certain defined errors and WAIT_FOR_RECOVERY on certain
-    # undefined errors--but this is convenient for development.
-    if ff.enable_error_recovery_experiments() and error_is_defined:
+    # undefined errors--but this is convenient for now.
+    error_is_defined = defined_error_data is not None
+    if robot_is_flex and error_is_defined:
         return ErrorRecoveryType.WAIT_FOR_RECOVERY
     else:
         return ErrorRecoveryType.FAIL_RUN
+
+
+def never_recover(
+    config: Config,
+    failed_command: Command,
+    defined_error_data: Optional[CommandDefinedErrorData],
+) -> ErrorRecoveryType:
+    """An error recovery policy where error recovery is never attempted.
+
+    This makes sense for things like the `opentrons_simulate` and `opentrons_execute`
+    CLIs. Those don't expose any way to bring the run out of recovery mode after it's
+    been entered, so we need to avoid entering recovery mode in the first place.
+    """
+    return ErrorRecoveryType.FAIL_RUN
diff --git a/api/src/opentrons/protocol_engine/execution/command_executor.py b/api/src/opentrons/protocol_engine/execution/command_executor.py
@@ -169,6 +169,7 @@ async def execute(self, command_id: str) -> None:
                     failed_at=self._model_utils.get_timestamp(),
                     notes=note_tracker.get_notes(),
                     type=self._error_recovery_policy(
+                        self._state_store.config,
                         running_command,
                         None,
                     ),
@@ -199,6 +200,10 @@ async def execute(self, command_id: str) -> None:
                         error_id=result.public.id,
                         failed_at=result.public.createdAt,
                         notes=note_tracker.get_notes(),
-                        type=self._error_recovery_policy(running_command, result),
+                        type=self._error_recovery_policy(
+                            self._state_store.config,
+                            running_command,
+                            result,
+                        ),
                     )
                 )
diff --git a/api/src/opentrons/protocol_engine/protocol_engine.py b/api/src/opentrons/protocol_engine/protocol_engine.py
@@ -3,10 +3,7 @@
 from logging import getLogger
 from typing import Dict, Optional, Union, AsyncGenerator, Callable
 from opentrons.protocol_engine.actions.actions import ResumeFromRecoveryAction
-from opentrons.protocol_engine.error_recovery_policy import (
-    ErrorRecoveryPolicy,
-    error_recovery_by_ff,
-)
+from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy
 
 from opentrons.protocols.models import LabwareDefinition
 from opentrons.hardware_control import HardwareControlAPI
@@ -86,14 +83,14 @@ def __init__(
         self,
         hardware_api: HardwareControlAPI,
         state_store: StateStore,
+        error_recovery_policy: ErrorRecoveryPolicy,
         action_dispatcher: Optional[ActionDispatcher] = None,
         plugin_starter: Optional[PluginStarter] = None,
         queue_worker: Optional[QueueWorker] = None,
         model_utils: Optional[ModelUtils] = None,
         hardware_stopper: Optional[HardwareStopper] = None,
         door_watcher: Optional[DoorWatcher] = None,
         module_data_provider: Optional[ModuleDataProvider] = None,
-        error_recovery_policy: ErrorRecoveryPolicy = error_recovery_by_ff,
     ) -> None:
         """Initialize a ProtocolEngine instance.
 

diff --git a/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py b/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py
@@ -6,6 +6,7 @@
 from opentrons.protocol_engine import (
     Config as ProtocolEngineConfig,
     DeckType,
+    error_recovery_policy,
 )
 from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine
 from opentrons.protocol_reader.protocol_source import ProtocolConfig
@@ -60,6 +61,7 @@ async def create_simulating_orchestrator(
             use_simulated_deck_config=True,
             use_virtual_pipettes=True,
         ),
+        error_recovery_policy=error_recovery_policy.never_recover,
         load_fixed_trash=should_load_fixed_trash(protocol_config),
     )
 

diff --git a/api/src/opentrons/simulate.py b/api/src/opentrons/simulate.py
@@ -45,6 +45,7 @@
     create_protocol_engine_in_thread,
     create_protocol_engine,
 )
+from opentrons.protocol_engine import error_recovery_policy
 from opentrons.protocol_engine.state.config import Config
 from opentrons.protocol_engine.types import DeckType, EngineStatus, PostRunHardwareState
 from opentrons.protocol_reader.protocol_source import ProtocolSource
@@ -806,6 +807,7 @@ def _create_live_context_pe(
             config=_get_protocol_engine_config(
                 robot_type, virtual=use_virtual_hardware
             ),
+            error_recovery_policy=error_recovery_policy.never_recover,
             drop_tips_after_run=False,
             post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE,
             load_fixed_trash=should_load_fixed_trash_labware_for_python_protocol(
@@ -910,6 +912,7 @@ async def run(protocol_source: ProtocolSource) -> _SimulateResult:
         protocol_engine = await create_protocol_engine(
             hardware_api=hardware_api_wrapped,
             config=_get_protocol_engine_config(robot_type, virtual=True),
+            error_recovery_policy=error_recovery_policy.never_recover,
             load_fixed_trash=should_load_fixed_trash(protocol_source.config),
         )
 

diff --git a/api/tests/opentrons/conftest.py b/api/tests/opentrons/conftest.py
@@ -64,7 +64,11 @@
 from opentrons.protocol_engine.create_protocol_engine import (
     create_protocol_engine_in_thread,
 )
-from opentrons.protocol_engine import Config as ProtocolEngineConfig, DeckType
+from opentrons.protocol_engine import (
+    Config as ProtocolEngineConfig,
+    DeckType,
+    error_recovery_policy,
+)
 from opentrons.protocols.api_support import deck_type
 from opentrons.protocols.api_support.types import APIVersion
 from opentrons.protocols.api_support.definitions import MAX_SUPPORTED_VERSION
@@ -312,6 +316,7 @@ def _make_ot3_pe_ctx(
             use_simulated_deck_config=True,
             block_on_door_open=False,
         ),
+        error_recovery_policy=error_recovery_policy.never_recover,
         drop_tips_after_run=False,
         post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE,
         # TODO(jbl 10-30-2023) load_fixed_trash being hardcoded to True will be refactored once we need tests to have

diff --git a/api/tests/opentrons/protocol_engine/execution/test_command_executor.py b/api/tests/opentrons/protocol_engine/execution/test_command_executor.py
@@ -498,9 +498,9 @@ class _TestCommand(
         datetime(year=2023, month=3, day=3),
     )
 
-    decoy.when(error_recovery_policy(matchers.Anything(), None)).then_return(
-        ErrorRecoveryType.WAIT_FOR_RECOVERY
-    )
+    decoy.when(
+        error_recovery_policy(matchers.Anything(), matchers.Anything(), None)
+    ).then_return(ErrorRecoveryType.WAIT_FOR_RECOVERY)
 
     decoy.when(command_note_tracker.get_notes()).then_return(command_notes)
 
@@ -636,6 +636,7 @@ class _TestCommand(
 
     decoy.when(
         error_recovery_policy(
+            matchers.Anything(),
             matchers.Anything(),
             returned_error,  # type: ignore[arg-type]
         )

diff --git a/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py b/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py
@@ -14,6 +14,7 @@
     ProtocolEngine,
     Config as EngineConfig,
     DeckType,
+    error_recovery_policy,
 )
 from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine
 
@@ -83,6 +84,7 @@ async def test_create_engine_initializes_state_with_no_fixed_trash(
             robot_type=robot_type,
             deck_type=deck_type,
         ),
+        error_recovery_policy=error_recovery_policy.never_recover,
         load_fixed_trash=False,
     )
     state = engine.state_view
@@ -140,6 +142,7 @@ async def test_create_engine_initializes_state_with_fixed_trash(
             robot_type=robot_type,
             deck_type=deck_type,
         ),
+        error_recovery_policy=error_recovery_policy.never_recover,
         load_fixed_trash=True,
     )
     state = engine.state_view
@@ -174,6 +177,7 @@ async def test_create_engine_initializes_state_with_door_state(
             robot_type="OT-2 Standard",
             deck_type=DeckType.OT2_SHORT_TRASH,
         ),
+        error_recovery_policy=error_recovery_policy.never_recover,
     )
     state = engine.state_view
     assert state.commands.get_is_door_blocking() is True