From fca049d7ad356bee874874d3756f8280f171942b Mon Sep 17 00:00:00 2001 From: Max Marrone Date: Mon, 8 Jul 2024 11:41:03 -0400 Subject: [PATCH] feat(api): Enable error recovery without the feature flag (#15588) --- api/src/opentrons/config/advanced_settings.py | 5 +- api/src/opentrons/execute.py | 3 + .../protocol_engine/create_protocol_engine.py | 14 ++++- .../protocol_engine/error_recovery_policy.py | 60 ++++++++++++------- .../execution/command_executor.py | 7 ++- .../protocol_engine/protocol_engine.py | 7 +-- .../create_simulating_orchestrator.py | 2 + api/src/opentrons/simulate.py | 3 + api/tests/opentrons/conftest.py | 7 ++- .../execution/test_command_executor.py | 7 ++- .../test_create_protocol_engine.py | 4 ++ .../protocol_engine/test_protocol_engine.py | 11 +++- .../maintenance_run_orchestrator_store.py | 2 + .../runs/run_orchestrator_store.py | 3 + 14 files changed, 97 insertions(+), 38 deletions(-) diff --git a/api/src/opentrons/config/advanced_settings.py b/api/src/opentrons/config/advanced_settings.py index f65b5824eb1..f1d95d76c7e 100644 --- a/api/src/opentrons/config/advanced_settings.py +++ b/api/src/opentrons/config/advanced_settings.py @@ -211,11 +211,8 @@ class Setting(NamedTuple): title="Enable error recovery experiments", description=( "Do not enable." - " This is an Opentrons internal setting to experiment with" + " This is an Opentrons internal setting to enable additional," " in-development error recovery features." - " This will interfere with your protocol runs," - " corrupt your robot's storage," - " bring misfortune and pestilence upon you and your livestock, etc." ), robot_type=[RobotTypeEnum.FLEX], internal_only=True, diff --git a/api/src/opentrons/execute.py b/api/src/opentrons/execute.py index 1c9343c0461..7bd57e02e6b 100644 --- a/api/src/opentrons/execute.py +++ b/api/src/opentrons/execute.py @@ -58,6 +58,7 @@ Config, DeckType, EngineStatus, + error_recovery_policy, ) from opentrons.protocol_engine.create_protocol_engine import ( create_protocol_engine_in_thread, @@ -545,6 +546,7 @@ def _create_live_context_pe( create_protocol_engine_in_thread( hardware_api=hardware_api_wrapped, config=_get_protocol_engine_config(), + error_recovery_policy=error_recovery_policy.never_recover, drop_tips_after_run=False, post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE, load_fixed_trash=should_load_fixed_trash_labware_for_python_protocol( @@ -628,6 +630,7 @@ async def run(protocol_source: ProtocolSource) -> None: protocol_engine = await create_protocol_engine( hardware_api=hardware_api_wrapped, config=_get_protocol_engine_config(), + error_recovery_policy=error_recovery_policy.never_recover, load_fixed_trash=should_load_fixed_trash(protocol_source.config), ) diff --git a/api/src/opentrons/protocol_engine/create_protocol_engine.py b/api/src/opentrons/protocol_engine/create_protocol_engine.py index 7da34a4e004..fd7b1b8bd5f 100644 --- a/api/src/opentrons/protocol_engine/create_protocol_engine.py +++ b/api/src/opentrons/protocol_engine/create_protocol_engine.py @@ -5,6 +5,7 @@ from opentrons.hardware_control import HardwareControlAPI from opentrons.hardware_control.types import DoorState +from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy from opentrons.util.async_helpers import async_context_manager_in_thread from .protocol_engine import ProtocolEngine @@ -12,15 +13,15 @@ from .state import Config, StateStore from .types import PostRunHardwareState, DeckConfigurationType - -# TODO(mm, 2023-06-16): Arguably, this not being a context manager makes us prone to forgetting to -# clean it up properly, especially in tests. See e.g. https://opentrons.atlassian.net/browse/RSS-222 from .engine_support import create_run_orchestrator +# TODO(mm, 2023-06-16): Arguably, this not being a context manager makes us prone to forgetting to +# clean it up properly, especially in tests. See e.g. https://opentrons.atlassian.net/browse/RSS-222 async def create_protocol_engine( hardware_api: HardwareControlAPI, config: Config, + error_recovery_policy: ErrorRecoveryPolicy, load_fixed_trash: bool = False, deck_configuration: typing.Optional[DeckConfigurationType] = None, notify_publishers: typing.Optional[typing.Callable[[], None]] = None, @@ -30,6 +31,8 @@ async def create_protocol_engine( Arguments: hardware_api: Hardware control API to pass down to dependencies. config: ProtocolEngine configuration. + error_recovery_policy: The error recovery policy to create the engine with. + See documentation on `ErrorRecoveryPolicy`. load_fixed_trash: Automatically load fixed trash labware in engine. deck_configuration: The initial deck configuration the engine will be instantiated with. notify_publishers: Notifies robot server publishers of internal state change. @@ -56,6 +59,7 @@ async def create_protocol_engine( return ProtocolEngine( state_store=state_store, hardware_api=hardware_api, + error_recovery_policy=error_recovery_policy, ) @@ -63,6 +67,7 @@ async def create_protocol_engine( def create_protocol_engine_in_thread( hardware_api: HardwareControlAPI, config: Config, + error_recovery_policy: ErrorRecoveryPolicy, drop_tips_after_run: bool, post_run_hardware_state: PostRunHardwareState, load_fixed_trash: bool = False, @@ -90,6 +95,7 @@ def create_protocol_engine_in_thread( _protocol_engine( hardware_api, config, + error_recovery_policy, drop_tips_after_run, post_run_hardware_state, load_fixed_trash, @@ -105,6 +111,7 @@ def create_protocol_engine_in_thread( async def _protocol_engine( hardware_api: HardwareControlAPI, config: Config, + error_recovery_policy: ErrorRecoveryPolicy, drop_tips_after_run: bool, post_run_hardware_state: PostRunHardwareState, load_fixed_trash: bool = False, @@ -112,6 +119,7 @@ async def _protocol_engine( protocol_engine = await create_protocol_engine( hardware_api=hardware_api, config=config, + error_recovery_policy=error_recovery_policy, load_fixed_trash=load_fixed_trash, ) diff --git a/api/src/opentrons/protocol_engine/error_recovery_policy.py b/api/src/opentrons/protocol_engine/error_recovery_policy.py index d2f126537ca..f7468961131 100644 --- a/api/src/opentrons/protocol_engine/error_recovery_policy.py +++ b/api/src/opentrons/protocol_engine/error_recovery_policy.py @@ -1,13 +1,16 @@ # noqa: D100 +from __future__ import annotations + import enum -from typing import Optional, Protocol +from typing import Optional, Protocol, TYPE_CHECKING -from opentrons.config import feature_flags as ff -from opentrons.protocol_engine.commands import ( - Command, - CommandDefinedErrorData, -) +if TYPE_CHECKING: + from opentrons.protocol_engine.commands import ( + Command, + CommandDefinedErrorData, + ) + from opentrons.protocol_engine.state.config import Config class ErrorRecoveryType(enum.Enum): @@ -48,27 +51,44 @@ class ErrorRecoveryPolicy(Protocol): @staticmethod def __call__( # noqa: D102 - failed_command: Command, defined_error_data: Optional[CommandDefinedErrorData] + config: Config, + failed_command: Command, + defined_error_data: Optional[CommandDefinedErrorData], ) -> ErrorRecoveryType: ... -def error_recovery_by_ff( - failed_command: Command, defined_error_data: Optional[CommandDefinedErrorData] +# todo(mm, 2024-07-05): This "static" policy will need to somehow become dynamic for +# https://opentrons.atlassian.net/browse/EXEC-589. +def standard_run_policy( + config: Config, + failed_command: Command, + defined_error_data: Optional[CommandDefinedErrorData], ) -> ErrorRecoveryType: - """Use API feature flags to decide how to handle an error. - - This is just for development. This should be replaced by a proper config - system exposed through robot-server's HTTP API. - """ - # todo(mm, 2024-03-18): Do we need to do anything explicit here to disable - # error recovery on the OT-2? - error_is_defined = defined_error_data is not None + """An error recovery policy suitable for normal protocol runs via robot-server.""" + # Although error recovery can theoretically work on OT-2s, we haven't tested it, + # and it's generally scarier because the OT-2 has much less hardware feedback. + robot_is_flex = config.robot_type == "OT-3 Standard" # If the error is defined, we're taking that to mean that we should - # WAIT_FOR_RECOVERY. This is not necessarily the right production logic--we might + # WAIT_FOR_RECOVERY. This is not necessarily the right long-term logic--we might # want to FAIL_RUN on certain defined errors and WAIT_FOR_RECOVERY on certain - # undefined errors--but this is convenient for development. - if ff.enable_error_recovery_experiments() and error_is_defined: + # undefined errors--but this is convenient for now. + error_is_defined = defined_error_data is not None + if robot_is_flex and error_is_defined: return ErrorRecoveryType.WAIT_FOR_RECOVERY else: return ErrorRecoveryType.FAIL_RUN + + +def never_recover( + config: Config, + failed_command: Command, + defined_error_data: Optional[CommandDefinedErrorData], +) -> ErrorRecoveryType: + """An error recovery policy where error recovery is never attempted. + + This makes sense for things like the `opentrons_simulate` and `opentrons_execute` + CLIs. Those don't expose any way to bring the run out of recovery mode after it's + been entered, so we need to avoid entering recovery mode in the first place. + """ + return ErrorRecoveryType.FAIL_RUN diff --git a/api/src/opentrons/protocol_engine/execution/command_executor.py b/api/src/opentrons/protocol_engine/execution/command_executor.py index 40c2bf31a54..efcaf847d89 100644 --- a/api/src/opentrons/protocol_engine/execution/command_executor.py +++ b/api/src/opentrons/protocol_engine/execution/command_executor.py @@ -169,6 +169,7 @@ async def execute(self, command_id: str) -> None: failed_at=self._model_utils.get_timestamp(), notes=note_tracker.get_notes(), type=self._error_recovery_policy( + self._state_store.config, running_command, None, ), @@ -199,6 +200,10 @@ async def execute(self, command_id: str) -> None: error_id=result.public.id, failed_at=result.public.createdAt, notes=note_tracker.get_notes(), - type=self._error_recovery_policy(running_command, result), + type=self._error_recovery_policy( + self._state_store.config, + running_command, + result, + ), ) ) diff --git a/api/src/opentrons/protocol_engine/protocol_engine.py b/api/src/opentrons/protocol_engine/protocol_engine.py index ed3d78f63dd..1c62404da16 100644 --- a/api/src/opentrons/protocol_engine/protocol_engine.py +++ b/api/src/opentrons/protocol_engine/protocol_engine.py @@ -3,10 +3,7 @@ from logging import getLogger from typing import Dict, Optional, Union, AsyncGenerator, Callable from opentrons.protocol_engine.actions.actions import ResumeFromRecoveryAction -from opentrons.protocol_engine.error_recovery_policy import ( - ErrorRecoveryPolicy, - error_recovery_by_ff, -) +from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy from opentrons.protocols.models import LabwareDefinition from opentrons.hardware_control import HardwareControlAPI @@ -86,6 +83,7 @@ def __init__( self, hardware_api: HardwareControlAPI, state_store: StateStore, + error_recovery_policy: ErrorRecoveryPolicy, action_dispatcher: Optional[ActionDispatcher] = None, plugin_starter: Optional[PluginStarter] = None, queue_worker: Optional[QueueWorker] = None, @@ -93,7 +91,6 @@ def __init__( hardware_stopper: Optional[HardwareStopper] = None, door_watcher: Optional[DoorWatcher] = None, module_data_provider: Optional[ModuleDataProvider] = None, - error_recovery_policy: ErrorRecoveryPolicy = error_recovery_by_ff, ) -> None: """Initialize a ProtocolEngine instance. diff --git a/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py b/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py index f17cd2ebce7..b6e41f020e7 100644 --- a/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py +++ b/api/src/opentrons/protocol_runner/create_simulating_orchestrator.py @@ -6,6 +6,7 @@ from opentrons.protocol_engine import ( Config as ProtocolEngineConfig, DeckType, + error_recovery_policy, ) from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine from opentrons.protocol_reader.protocol_source import ProtocolConfig @@ -60,6 +61,7 @@ async def create_simulating_orchestrator( use_simulated_deck_config=True, use_virtual_pipettes=True, ), + error_recovery_policy=error_recovery_policy.never_recover, load_fixed_trash=should_load_fixed_trash(protocol_config), ) diff --git a/api/src/opentrons/simulate.py b/api/src/opentrons/simulate.py index ee1d391425d..5a253a770e9 100644 --- a/api/src/opentrons/simulate.py +++ b/api/src/opentrons/simulate.py @@ -45,6 +45,7 @@ create_protocol_engine_in_thread, create_protocol_engine, ) +from opentrons.protocol_engine import error_recovery_policy from opentrons.protocol_engine.state.config import Config from opentrons.protocol_engine.types import DeckType, EngineStatus, PostRunHardwareState from opentrons.protocol_reader.protocol_source import ProtocolSource @@ -806,6 +807,7 @@ def _create_live_context_pe( config=_get_protocol_engine_config( robot_type, virtual=use_virtual_hardware ), + error_recovery_policy=error_recovery_policy.never_recover, drop_tips_after_run=False, post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE, load_fixed_trash=should_load_fixed_trash_labware_for_python_protocol( @@ -910,6 +912,7 @@ async def run(protocol_source: ProtocolSource) -> _SimulateResult: protocol_engine = await create_protocol_engine( hardware_api=hardware_api_wrapped, config=_get_protocol_engine_config(robot_type, virtual=True), + error_recovery_policy=error_recovery_policy.never_recover, load_fixed_trash=should_load_fixed_trash(protocol_source.config), ) diff --git a/api/tests/opentrons/conftest.py b/api/tests/opentrons/conftest.py index e657068e331..f25cb781d28 100755 --- a/api/tests/opentrons/conftest.py +++ b/api/tests/opentrons/conftest.py @@ -64,7 +64,11 @@ from opentrons.protocol_engine.create_protocol_engine import ( create_protocol_engine_in_thread, ) -from opentrons.protocol_engine import Config as ProtocolEngineConfig, DeckType +from opentrons.protocol_engine import ( + Config as ProtocolEngineConfig, + DeckType, + error_recovery_policy, +) from opentrons.protocols.api_support import deck_type from opentrons.protocols.api_support.types import APIVersion from opentrons.protocols.api_support.definitions import MAX_SUPPORTED_VERSION @@ -312,6 +316,7 @@ def _make_ot3_pe_ctx( use_simulated_deck_config=True, block_on_door_open=False, ), + error_recovery_policy=error_recovery_policy.never_recover, drop_tips_after_run=False, post_run_hardware_state=PostRunHardwareState.STAY_ENGAGED_IN_PLACE, # TODO(jbl 10-30-2023) load_fixed_trash being hardcoded to True will be refactored once we need tests to have diff --git a/api/tests/opentrons/protocol_engine/execution/test_command_executor.py b/api/tests/opentrons/protocol_engine/execution/test_command_executor.py index 2bc4a65d859..b55bdc24c18 100644 --- a/api/tests/opentrons/protocol_engine/execution/test_command_executor.py +++ b/api/tests/opentrons/protocol_engine/execution/test_command_executor.py @@ -498,9 +498,9 @@ class _TestCommand( datetime(year=2023, month=3, day=3), ) - decoy.when(error_recovery_policy(matchers.Anything(), None)).then_return( - ErrorRecoveryType.WAIT_FOR_RECOVERY - ) + decoy.when( + error_recovery_policy(matchers.Anything(), matchers.Anything(), None) + ).then_return(ErrorRecoveryType.WAIT_FOR_RECOVERY) decoy.when(command_note_tracker.get_notes()).then_return(command_notes) @@ -636,6 +636,7 @@ class _TestCommand( decoy.when( error_recovery_policy( + matchers.Anything(), matchers.Anything(), returned_error, # type: ignore[arg-type] ) diff --git a/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py b/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py index e24521d4303..d3bfd4843bc 100644 --- a/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py +++ b/api/tests/opentrons/protocol_engine/test_create_protocol_engine.py @@ -14,6 +14,7 @@ ProtocolEngine, Config as EngineConfig, DeckType, + error_recovery_policy, ) from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine @@ -83,6 +84,7 @@ async def test_create_engine_initializes_state_with_no_fixed_trash( robot_type=robot_type, deck_type=deck_type, ), + error_recovery_policy=error_recovery_policy.never_recover, load_fixed_trash=False, ) state = engine.state_view @@ -140,6 +142,7 @@ async def test_create_engine_initializes_state_with_fixed_trash( robot_type=robot_type, deck_type=deck_type, ), + error_recovery_policy=error_recovery_policy.never_recover, load_fixed_trash=True, ) state = engine.state_view @@ -174,6 +177,7 @@ async def test_create_engine_initializes_state_with_door_state( robot_type="OT-2 Standard", deck_type=DeckType.OT2_SHORT_TRASH, ), + error_recovery_policy=error_recovery_policy.never_recover, ) state = engine.state_view assert state.commands.get_is_door_blocking() is True diff --git a/api/tests/opentrons/protocol_engine/test_protocol_engine.py b/api/tests/opentrons/protocol_engine/test_protocol_engine.py index f9b89797424..bcb95916501 100644 --- a/api/tests/opentrons/protocol_engine/test_protocol_engine.py +++ b/api/tests/opentrons/protocol_engine/test_protocol_engine.py @@ -8,7 +8,6 @@ from decoy import Decoy from opentrons_shared_data.robot.dev_types import RobotType -from opentrons.protocol_engine.actions.actions import ResumeFromRecoveryAction from opentrons.types import DeckSlotName from opentrons.hardware_control import HardwareControlAPI, OT2HardwareControlAPI @@ -33,6 +32,7 @@ PostRunHardwareState, AddressableAreaLocation, ) +from opentrons.protocol_engine.error_recovery_policy import ErrorRecoveryPolicy from opentrons.protocol_engine.execution import ( QueueWorker, HardwareStopper, @@ -53,6 +53,7 @@ PlayAction, PauseAction, PauseSource, + ResumeFromRecoveryAction, StopAction, FinishAction, FinishErrorDetails, @@ -116,6 +117,12 @@ def module_data_provider(decoy: Decoy) -> ModuleDataProvider: return decoy.mock(cls=ModuleDataProvider) +@pytest.fixture +def error_recovery_policy(decoy: Decoy) -> ErrorRecoveryPolicy: + """Get a mock ErrorRecoveryPolicy.""" + return decoy.mock(cls=ErrorRecoveryPolicy) + + @pytest.fixture(autouse=True) def _mock_slot_standardization_module( decoy: Decoy, monkeypatch: pytest.MonkeyPatch @@ -139,6 +146,7 @@ def _mock_hash_command_params_module( def subject( hardware_api: HardwareControlAPI, state_store: StateStore, + error_recovery_policy: ErrorRecoveryPolicy, action_dispatcher: ActionDispatcher, plugin_starter: PluginStarter, queue_worker: QueueWorker, @@ -151,6 +159,7 @@ def subject( return ProtocolEngine( hardware_api=hardware_api, state_store=state_store, + error_recovery_policy=error_recovery_policy, action_dispatcher=action_dispatcher, plugin_starter=plugin_starter, queue_worker=queue_worker, diff --git a/robot-server/robot_server/maintenance_runs/maintenance_run_orchestrator_store.py b/robot-server/robot_server/maintenance_runs/maintenance_run_orchestrator_store.py index ea27035d50a..e73002ce5ea 100644 --- a/robot-server/robot_server/maintenance_runs/maintenance_run_orchestrator_store.py +++ b/robot-server/robot_server/maintenance_runs/maintenance_run_orchestrator_store.py @@ -16,6 +16,7 @@ CommandCreate, LabwareOffset, Config as ProtocolEngineConfig, + error_recovery_policy, ) from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine from opentrons.protocol_runner import RunResult, RunOrchestrator @@ -174,6 +175,7 @@ async def create( RobotTypeEnum.robot_literal_to_enum(self._robot_type) ), ), + error_recovery_policy=error_recovery_policy.never_recover, deck_configuration=deck_configuration, notify_publishers=notify_publishers, ) diff --git a/robot-server/robot_server/runs/run_orchestrator_store.py b/robot-server/robot_server/runs/run_orchestrator_store.py index 4854c3e4aa2..de346a72968 100644 --- a/robot-server/robot_server/runs/run_orchestrator_store.py +++ b/robot-server/robot_server/runs/run_orchestrator_store.py @@ -34,6 +34,7 @@ CommandCreate, LabwareOffset, Config as ProtocolEngineConfig, + error_recovery_policy, ) from opentrons.protocol_engine.create_protocol_engine import create_protocol_engine @@ -170,6 +171,7 @@ async def get_default_orchestrator(self) -> RunOrchestrator: deck_type=self._deck_type, block_on_door_open=False, ), + error_recovery_policy=error_recovery_policy.never_recover, ) self._default_run_orchestrator = RunOrchestrator.build_orchestrator( protocol_engine=engine, hardware_api=self._hardware_api @@ -219,6 +221,7 @@ async def create( RobotTypeEnum.robot_literal_to_enum(self._robot_type) ), ), + error_recovery_policy=error_recovery_policy.standard_run_policy, load_fixed_trash=load_fixed_trash, deck_configuration=deck_configuration, notify_publishers=notify_publishers,