From eb710c036b9576fabee093765f72c40681719976 Mon Sep 17 00:00:00 2001 From: Max Marrone Date: Thu, 24 Oct 2024 10:45:17 -0400 Subject: [PATCH] feat(robot-server): HTTP API for "Ignore error and skip to next step" (#16564) --- api-client/src/runs/types.ts | 9 +++- .../robot_server/runs/action_models.py | 54 ++++++++++++++----- .../runs/error_recovery_mapping.py | 4 ++ .../runs/error_recovery_models.py | 36 ++++++++++--- .../robot_server/runs/router/base_router.py | 1 - .../robot_server/runs/run_controller.py | 12 +++++ 6 files changed, 94 insertions(+), 22 deletions(-) diff --git a/api-client/src/runs/types.ts b/api-client/src/runs/types.ts index 2c467b4de4f..9a34c2b4978 100644 --- a/api-client/src/runs/types.ts +++ b/api-client/src/runs/types.ts @@ -125,12 +125,15 @@ export const RUN_ACTION_TYPE_PAUSE: 'pause' = 'pause' export const RUN_ACTION_TYPE_STOP: 'stop' = 'stop' export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY: 'resume-from-recovery' = 'resume-from-recovery' +export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE: 'resume-from-recovery-assuming-false-positive' = + 'resume-from-recovery-assuming-false-positive' export type RunActionType = | typeof RUN_ACTION_TYPE_PLAY | typeof RUN_ACTION_TYPE_PAUSE | typeof RUN_ACTION_TYPE_STOP | typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY + | typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE export interface RunAction { id: string @@ -172,7 +175,11 @@ export type RunError = RunCommandError * Error Policy */ -export type IfMatchType = 'ignoreAndContinue' | 'failRun' | 'waitForRecovery' +export type IfMatchType = + | 'assumeFalsePositiveAndContinue' + | 'ignoreAndContinue' + | 'failRun' + | 'waitForRecovery' export interface ErrorRecoveryPolicy { policyRules: Array<{ diff --git a/robot-server/robot_server/runs/action_models.py b/robot-server/robot_server/runs/action_models.py index ede27d823c6..c640fb33cae 100644 --- a/robot-server/robot_server/runs/action_models.py +++ b/robot-server/robot_server/runs/action_models.py @@ -7,20 +7,53 @@ class RunActionType(str, Enum): - """The type of the run control action. - - * `"play"`: Start or resume a run. - * `"pause"`: Pause a run. - * `"stop"`: Stop (cancel) a run. - * `"resume-from-recovery"`: Resume normal protocol execution after a command failed, - the run was placed in `awaiting-recovery` mode, and manual recovery steps - were taken. + """The type of the run control action, which determines behavior. + + * `"play"`: Start the run, or resume it after it's been paused. + + * `"pause"`: Pause the run. + + * `"stop"`: Stop (cancel) the run. + + * `"resume-from-recovery"`: Resume normal protocol execution after the run was in + error recovery mode. Continue from however the last command left the robot. + + * `"resume-from-recovery-assuming-false-positive"`: Resume normal protocol execution + after the run was in error recovery mode. Act as if the underlying error was a + false positive. + + To see the difference between `"resume-from-recovery"` and + `"resume-from-recovery-assuming-false-positive"`, suppose we've just entered error + recovery mode after a `commandType: "pickUpTip"` command failed with an + `errorType: "tipPhysicallyMissing"` error. That normally leaves the robot thinking + it has no tip attached. If you use `"resume-from-recovery"`, the robot will run + the next protocol command from that state, acting as if there's no tip attached. + (This may cause another error, if the next command needs a tip.) + Whereas if you use `"resume-from-recovery-assuming-false-positive"`, + the robot will try to nullify the error, thereby acting as if it *does* have a tip + attached. + + Generally: + + * If you've tried to recover from the error by sending your own `intent: "fixit"` + commands to `POST /runs/{id}/commands`, use `"resume-from-recovery"`. It's your + responsibility to ensure your `POST`ed commands leave the robot in a good-enough + state to continue with the protocol. + + * Otherwise, use `"resume-from-recovery-assuming-false-positive"`. + + Do not combine `intent: "fixit"` commands with + `"resume-from-recovery-assuming-false-positive"`—the robot's built-in + false-positive recovery may compete with your own. """ PLAY = "play" PAUSE = "pause" STOP = "stop" RESUME_FROM_RECOVERY = "resume-from-recovery" + RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE = ( + "resume-from-recovery-assuming-false-positive" + ) class RunActionCreate(BaseModel): @@ -41,7 +74,4 @@ class RunAction(ResourceModel): id: str = Field(..., description="A unique identifier to reference the command.") createdAt: datetime = Field(..., description="When the command was created.") - actionType: RunActionType = Field( - ..., - description="Specific type of action, which determines behavior.", - ) + actionType: RunActionType diff --git a/robot-server/robot_server/runs/error_recovery_mapping.py b/robot-server/robot_server/runs/error_recovery_mapping.py index d29ebf4b054..b548394cd8a 100644 --- a/robot-server/robot_server/runs/error_recovery_mapping.py +++ b/robot-server/robot_server/runs/error_recovery_mapping.py @@ -102,6 +102,10 @@ def _map_error_recovery_type(reaction_if_match: ReactionIfMatch) -> ErrorRecover match reaction_if_match: case ReactionIfMatch.IGNORE_AND_CONTINUE: return ErrorRecoveryType.IGNORE_AND_CONTINUE + case ReactionIfMatch.ASSUME_FALSE_POSITIVE_AND_CONTINUE: + # todo(mm, 2024-10-23): Connect to work in + # https://github.com/Opentrons/opentrons/pull/16556. + return ErrorRecoveryType.IGNORE_AND_CONTINUE case ReactionIfMatch.FAIL_RUN: return ErrorRecoveryType.FAIL_RUN case ReactionIfMatch.WAIT_FOR_RECOVERY: diff --git a/robot-server/robot_server/runs/error_recovery_models.py b/robot-server/robot_server/runs/error_recovery_models.py index a2990a007cb..1e2d4ac45aa 100644 --- a/robot-server/robot_server/runs/error_recovery_models.py +++ b/robot-server/robot_server/runs/error_recovery_models.py @@ -24,17 +24,40 @@ class ReactionIfMatch(Enum): - """How to handle a given error. + """How to handle a matching error. - * `"ignoreAndContinue"`: Ignore this error and continue with the next command. * `"failRun"`: Fail the run. - * `"waitForRecovery"`: Enter interactive error recovery mode. + * `"waitForRecovery"`: Enter interactive error recovery mode. You can then + perform error recovery with `POST /runs/{id}/commands` and exit error + recovery mode with `POST /runs/{id}/actions`. + + * `"assumeFalsePositiveAndContinue"`: Continue the run without interruption, acting + as if the error was a false positive. + + This is equivalent to doing `"waitForRecovery"` + and then sending `actionType: "resume-from-recovery-assuming-false-positive"` + to `POST /runs/{id}/actions`, except this requires no ongoing intervention from + the client. + + * `"ignoreAndContinue"`: Continue the run without interruption, accepting whatever + state the error left the robot in. + + This is equivalent to doing `"waitForRecovery"` + and then sending `actionType: "resume-from-recovery"` to `POST /runs/{id}/actions`, + except this requires no ongoing intervention from the client. + + This is probably not useful very often because it's likely to cause downstream + errors—imagine trying an `aspirate` command after a failed `pickUpTip` command. + This is provided for symmetry. """ - IGNORE_AND_CONTINUE = "ignoreAndContinue" FAIL_RUN = "failRun" WAIT_FOR_RECOVERY = "waitForRecovery" + ASSUME_FALSE_POSITIVE_AND_CONTINUE = "assumeFalsePositiveAndContinue" + # todo(mm, 2024-10-22): "ignoreAndContinue" may be a misnomer now: is + # "assumeFalsePositiveAndContinue" not also a way to "ignore"? Consider renaming. + IGNORE_AND_CONTINUE = "ignoreAndContinue" class ErrorMatcher(BaseModel): @@ -69,10 +92,7 @@ class ErrorRecoveryRule(BaseModel): ..., description="The criteria that must be met for this rule to be applied.", ) - ifMatch: ReactionIfMatch = Field( - ..., - description="How to handle errors matched by this rule.", - ) + ifMatch: ReactionIfMatch class ErrorRecoveryPolicy(BaseModel): diff --git a/robot-server/robot_server/runs/router/base_router.py b/robot-server/robot_server/runs/router/base_router.py index 788ca44aa1c..c108fa60a74 100644 --- a/robot-server/robot_server/runs/router/base_router.py +++ b/robot-server/robot_server/runs/router/base_router.py @@ -442,7 +442,6 @@ async def update_run( See `PATCH /errorRecovery/settings`. """ ), - status_code=status.HTTP_201_CREATED, responses={ status.HTTP_200_OK: {"model": SimpleEmptyBody}, status.HTTP_409_CONFLICT: {"model": ErrorBody[RunStopped]}, diff --git a/robot-server/robot_server/runs/run_controller.py b/robot-server/robot_server/runs/run_controller.py index 903bf22f252..1619cd20a08 100644 --- a/robot-server/robot_server/runs/run_controller.py +++ b/robot-server/robot_server/runs/run_controller.py @@ -2,6 +2,7 @@ import logging from datetime import datetime from typing import Optional +from typing_extensions import assert_never from opentrons.protocol_engine import ProtocolEngineError from opentrons_shared_data.errors.exceptions import RoboticsInteractionError @@ -96,6 +97,17 @@ def create_action( elif action_type == RunActionType.RESUME_FROM_RECOVERY: self._run_orchestrator_store.resume_from_recovery() + elif ( + action_type + == RunActionType.RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE + ): + # todo(mm, 2024-10-23): Connect to work in + # https://github.com/Opentrons/opentrons/pull/16556. + self._run_orchestrator_store.resume_from_recovery() + + else: + assert_never(action_type) + except ProtocolEngineError as e: raise RunActionNotAllowedError(message=e.message, wrapping=[e]) from e