Skip to content

Commit

Permalink
feat(robot-server): HTTP API for "Ignore error and skip to next step" (
Browse files Browse the repository at this point in the history
  • Loading branch information
SyntaxColoring authored Oct 24, 2024
1 parent 8018920 commit eb710c0
Show file tree
Hide file tree
Showing 6 changed files with 94 additions and 22 deletions.
9 changes: 8 additions & 1 deletion api-client/src/runs/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -125,12 +125,15 @@ export const RUN_ACTION_TYPE_PAUSE: 'pause' = 'pause'
export const RUN_ACTION_TYPE_STOP: 'stop' = 'stop'
export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY: 'resume-from-recovery' =
'resume-from-recovery'
export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE: 'resume-from-recovery-assuming-false-positive' =
'resume-from-recovery-assuming-false-positive'

export type RunActionType =
| typeof RUN_ACTION_TYPE_PLAY
| typeof RUN_ACTION_TYPE_PAUSE
| typeof RUN_ACTION_TYPE_STOP
| typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY
| typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE

export interface RunAction {
id: string
Expand Down Expand Up @@ -172,7 +175,11 @@ export type RunError = RunCommandError
* Error Policy
*/

export type IfMatchType = 'ignoreAndContinue' | 'failRun' | 'waitForRecovery'
export type IfMatchType =
| 'assumeFalsePositiveAndContinue'
| 'ignoreAndContinue'
| 'failRun'
| 'waitForRecovery'

export interface ErrorRecoveryPolicy {
policyRules: Array<{
Expand Down
54 changes: 42 additions & 12 deletions robot-server/robot_server/runs/action_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,20 +7,53 @@


class RunActionType(str, Enum):
"""The type of the run control action.
* `"play"`: Start or resume a run.
* `"pause"`: Pause a run.
* `"stop"`: Stop (cancel) a run.
* `"resume-from-recovery"`: Resume normal protocol execution after a command failed,
the run was placed in `awaiting-recovery` mode, and manual recovery steps
were taken.
"""The type of the run control action, which determines behavior.
* `"play"`: Start the run, or resume it after it's been paused.
* `"pause"`: Pause the run.
* `"stop"`: Stop (cancel) the run.
* `"resume-from-recovery"`: Resume normal protocol execution after the run was in
error recovery mode. Continue from however the last command left the robot.
* `"resume-from-recovery-assuming-false-positive"`: Resume normal protocol execution
after the run was in error recovery mode. Act as if the underlying error was a
false positive.
To see the difference between `"resume-from-recovery"` and
`"resume-from-recovery-assuming-false-positive"`, suppose we've just entered error
recovery mode after a `commandType: "pickUpTip"` command failed with an
`errorType: "tipPhysicallyMissing"` error. That normally leaves the robot thinking
it has no tip attached. If you use `"resume-from-recovery"`, the robot will run
the next protocol command from that state, acting as if there's no tip attached.
(This may cause another error, if the next command needs a tip.)
Whereas if you use `"resume-from-recovery-assuming-false-positive"`,
the robot will try to nullify the error, thereby acting as if it *does* have a tip
attached.
Generally:
* If you've tried to recover from the error by sending your own `intent: "fixit"`
commands to `POST /runs/{id}/commands`, use `"resume-from-recovery"`. It's your
responsibility to ensure your `POST`ed commands leave the robot in a good-enough
state to continue with the protocol.
* Otherwise, use `"resume-from-recovery-assuming-false-positive"`.
Do not combine `intent: "fixit"` commands with
`"resume-from-recovery-assuming-false-positive"`—the robot's built-in
false-positive recovery may compete with your own.
"""

PLAY = "play"
PAUSE = "pause"
STOP = "stop"
RESUME_FROM_RECOVERY = "resume-from-recovery"
RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE = (
"resume-from-recovery-assuming-false-positive"
)


class RunActionCreate(BaseModel):
Expand All @@ -41,7 +74,4 @@ class RunAction(ResourceModel):

id: str = Field(..., description="A unique identifier to reference the command.")
createdAt: datetime = Field(..., description="When the command was created.")
actionType: RunActionType = Field(
...,
description="Specific type of action, which determines behavior.",
)
actionType: RunActionType
4 changes: 4 additions & 0 deletions robot-server/robot_server/runs/error_recovery_mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,10 @@ def _map_error_recovery_type(reaction_if_match: ReactionIfMatch) -> ErrorRecover
match reaction_if_match:
case ReactionIfMatch.IGNORE_AND_CONTINUE:
return ErrorRecoveryType.IGNORE_AND_CONTINUE
case ReactionIfMatch.ASSUME_FALSE_POSITIVE_AND_CONTINUE:
# todo(mm, 2024-10-23): Connect to work in
# https://github.com/Opentrons/opentrons/pull/16556.
return ErrorRecoveryType.IGNORE_AND_CONTINUE
case ReactionIfMatch.FAIL_RUN:
return ErrorRecoveryType.FAIL_RUN
case ReactionIfMatch.WAIT_FOR_RECOVERY:
Expand Down
36 changes: 28 additions & 8 deletions robot-server/robot_server/runs/error_recovery_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,17 +24,40 @@


class ReactionIfMatch(Enum):
"""How to handle a given error.
"""How to handle a matching error.
* `"ignoreAndContinue"`: Ignore this error and continue with the next command.
* `"failRun"`: Fail the run.
* `"waitForRecovery"`: Enter interactive error recovery mode.
* `"waitForRecovery"`: Enter interactive error recovery mode. You can then
perform error recovery with `POST /runs/{id}/commands` and exit error
recovery mode with `POST /runs/{id}/actions`.
* `"assumeFalsePositiveAndContinue"`: Continue the run without interruption, acting
as if the error was a false positive.
This is equivalent to doing `"waitForRecovery"`
and then sending `actionType: "resume-from-recovery-assuming-false-positive"`
to `POST /runs/{id}/actions`, except this requires no ongoing intervention from
the client.
* `"ignoreAndContinue"`: Continue the run without interruption, accepting whatever
state the error left the robot in.
This is equivalent to doing `"waitForRecovery"`
and then sending `actionType: "resume-from-recovery"` to `POST /runs/{id}/actions`,
except this requires no ongoing intervention from the client.
This is probably not useful very often because it's likely to cause downstream
errors—imagine trying an `aspirate` command after a failed `pickUpTip` command.
This is provided for symmetry.
"""

IGNORE_AND_CONTINUE = "ignoreAndContinue"
FAIL_RUN = "failRun"
WAIT_FOR_RECOVERY = "waitForRecovery"
ASSUME_FALSE_POSITIVE_AND_CONTINUE = "assumeFalsePositiveAndContinue"
# todo(mm, 2024-10-22): "ignoreAndContinue" may be a misnomer now: is
# "assumeFalsePositiveAndContinue" not also a way to "ignore"? Consider renaming.
IGNORE_AND_CONTINUE = "ignoreAndContinue"


class ErrorMatcher(BaseModel):
Expand Down Expand Up @@ -69,10 +92,7 @@ class ErrorRecoveryRule(BaseModel):
...,
description="The criteria that must be met for this rule to be applied.",
)
ifMatch: ReactionIfMatch = Field(
...,
description="How to handle errors matched by this rule.",
)
ifMatch: ReactionIfMatch


class ErrorRecoveryPolicy(BaseModel):
Expand Down
1 change: 0 additions & 1 deletion robot-server/robot_server/runs/router/base_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,6 @@ async def update_run(
See `PATCH /errorRecovery/settings`.
"""
),
status_code=status.HTTP_201_CREATED,
responses={
status.HTTP_200_OK: {"model": SimpleEmptyBody},
status.HTTP_409_CONFLICT: {"model": ErrorBody[RunStopped]},
Expand Down
12 changes: 12 additions & 0 deletions robot-server/robot_server/runs/run_controller.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import logging
from datetime import datetime
from typing import Optional
from typing_extensions import assert_never
from opentrons.protocol_engine import ProtocolEngineError
from opentrons_shared_data.errors.exceptions import RoboticsInteractionError

Expand Down Expand Up @@ -96,6 +97,17 @@ def create_action(
elif action_type == RunActionType.RESUME_FROM_RECOVERY:
self._run_orchestrator_store.resume_from_recovery()

elif (
action_type
== RunActionType.RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE
):
# todo(mm, 2024-10-23): Connect to work in
# https://github.com/Opentrons/opentrons/pull/16556.
self._run_orchestrator_store.resume_from_recovery()

else:
assert_never(action_type)

except ProtocolEngineError as e:
raise RunActionNotAllowedError(message=e.message, wrapping=[e]) from e

Expand Down

0 comments on commit eb710c0

Please sign in to comment.