From eb710c036b9576fabee093765f72c40681719976 Mon Sep 17 00:00:00 2001
From: Max Marrone <max@opentrons.com>
Date: Thu, 24 Oct 2024 10:45:17 -0400
Subject: [PATCH] feat(robot-server): HTTP API for "Ignore error and skip to
 next step" (#16564)

---
 api-client/src/runs/types.ts                  |  9 +++-
 .../robot_server/runs/action_models.py        | 54 ++++++++++++++-----
 .../runs/error_recovery_mapping.py            |  4 ++
 .../runs/error_recovery_models.py             | 36 ++++++++++---
 .../robot_server/runs/router/base_router.py   |  1 -
 .../robot_server/runs/run_controller.py       | 12 +++++
 6 files changed, 94 insertions(+), 22 deletions(-)

diff --git a/api-client/src/runs/types.ts b/api-client/src/runs/types.ts
index 2c467b4de4f..9a34c2b4978 100644
--- a/api-client/src/runs/types.ts
+++ b/api-client/src/runs/types.ts
@@ -125,12 +125,15 @@ export const RUN_ACTION_TYPE_PAUSE: 'pause' = 'pause'
 export const RUN_ACTION_TYPE_STOP: 'stop' = 'stop'
 export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY: 'resume-from-recovery' =
   'resume-from-recovery'
+export const RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE: 'resume-from-recovery-assuming-false-positive' =
+  'resume-from-recovery-assuming-false-positive'
 
 export type RunActionType =
   | typeof RUN_ACTION_TYPE_PLAY
   | typeof RUN_ACTION_TYPE_PAUSE
   | typeof RUN_ACTION_TYPE_STOP
   | typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY
+  | typeof RUN_ACTION_TYPE_RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE
 
 export interface RunAction {
   id: string
@@ -172,7 +175,11 @@ export type RunError = RunCommandError
  * Error Policy
  */
 
-export type IfMatchType = 'ignoreAndContinue' | 'failRun' | 'waitForRecovery'
+export type IfMatchType =
+  | 'assumeFalsePositiveAndContinue'
+  | 'ignoreAndContinue'
+  | 'failRun'
+  | 'waitForRecovery'
 
 export interface ErrorRecoveryPolicy {
   policyRules: Array<{
diff --git a/robot-server/robot_server/runs/action_models.py b/robot-server/robot_server/runs/action_models.py
index ede27d823c6..c640fb33cae 100644
--- a/robot-server/robot_server/runs/action_models.py
+++ b/robot-server/robot_server/runs/action_models.py
@@ -7,20 +7,53 @@
 
 
 class RunActionType(str, Enum):
-    """The type of the run control action.
-
-    * `"play"`: Start or resume a run.
-    * `"pause"`: Pause a run.
-    * `"stop"`: Stop (cancel) a run.
-    * `"resume-from-recovery"`: Resume normal protocol execution after a command failed,
-      the run was placed in `awaiting-recovery` mode, and manual recovery steps
-      were taken.
+    """The type of the run control action, which determines behavior.
+
+    * `"play"`: Start the run, or resume it after it's been paused.
+
+    * `"pause"`: Pause the run.
+
+    * `"stop"`: Stop (cancel) the run.
+
+    * `"resume-from-recovery"`: Resume normal protocol execution after the run was in
+      error recovery mode. Continue from however the last command left the robot.
+
+    * `"resume-from-recovery-assuming-false-positive"`: Resume normal protocol execution
+      after the run was in error recovery mode. Act as if the underlying error was a
+      false positive.
+
+    To see the difference between `"resume-from-recovery"` and
+    `"resume-from-recovery-assuming-false-positive"`, suppose we've just entered error
+    recovery mode after a `commandType: "pickUpTip"` command failed with an
+    `errorType: "tipPhysicallyMissing"` error. That normally leaves the robot thinking
+    it has no tip attached. If you use `"resume-from-recovery"`, the robot will run
+    the next protocol command from that state, acting as if there's no tip attached.
+    (This may cause another error, if the next command needs a tip.)
+    Whereas if you use `"resume-from-recovery-assuming-false-positive"`,
+    the robot will try to nullify the error, thereby acting as if it *does* have a tip
+    attached.
+
+    Generally:
+
+    * If you've tried to recover from the error by sending your own `intent: "fixit"`
+      commands to `POST /runs/{id}/commands`, use `"resume-from-recovery"`. It's your
+      responsibility to ensure your `POST`ed commands leave the robot in a good-enough
+      state to continue with the protocol.
+
+    * Otherwise, use `"resume-from-recovery-assuming-false-positive"`.
+
+    Do not combine `intent: "fixit"` commands with
+    `"resume-from-recovery-assuming-false-positive"`—the robot's built-in
+    false-positive recovery may compete with your own.
     """
 
     PLAY = "play"
     PAUSE = "pause"
     STOP = "stop"
     RESUME_FROM_RECOVERY = "resume-from-recovery"
+    RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE = (
+        "resume-from-recovery-assuming-false-positive"
+    )
 
 
 class RunActionCreate(BaseModel):
@@ -41,7 +74,4 @@ class RunAction(ResourceModel):
 
     id: str = Field(..., description="A unique identifier to reference the command.")
     createdAt: datetime = Field(..., description="When the command was created.")
-    actionType: RunActionType = Field(
-        ...,
-        description="Specific type of action, which determines behavior.",
-    )
+    actionType: RunActionType
diff --git a/robot-server/robot_server/runs/error_recovery_mapping.py b/robot-server/robot_server/runs/error_recovery_mapping.py
index d29ebf4b054..b548394cd8a 100644
--- a/robot-server/robot_server/runs/error_recovery_mapping.py
+++ b/robot-server/robot_server/runs/error_recovery_mapping.py
@@ -102,6 +102,10 @@ def _map_error_recovery_type(reaction_if_match: ReactionIfMatch) -> ErrorRecover
     match reaction_if_match:
         case ReactionIfMatch.IGNORE_AND_CONTINUE:
             return ErrorRecoveryType.IGNORE_AND_CONTINUE
+        case ReactionIfMatch.ASSUME_FALSE_POSITIVE_AND_CONTINUE:
+            # todo(mm, 2024-10-23): Connect to work in
+            # https://github.com/Opentrons/opentrons/pull/16556.
+            return ErrorRecoveryType.IGNORE_AND_CONTINUE
         case ReactionIfMatch.FAIL_RUN:
             return ErrorRecoveryType.FAIL_RUN
         case ReactionIfMatch.WAIT_FOR_RECOVERY:
diff --git a/robot-server/robot_server/runs/error_recovery_models.py b/robot-server/robot_server/runs/error_recovery_models.py
index a2990a007cb..1e2d4ac45aa 100644
--- a/robot-server/robot_server/runs/error_recovery_models.py
+++ b/robot-server/robot_server/runs/error_recovery_models.py
@@ -24,17 +24,40 @@
 
 
 class ReactionIfMatch(Enum):
-    """How to handle a given error.
+    """How to handle a matching error.
 
-    * `"ignoreAndContinue"`: Ignore this error and continue with the next command.
     * `"failRun"`: Fail the run.
-    * `"waitForRecovery"`: Enter interactive error recovery mode.
 
+    * `"waitForRecovery"`: Enter interactive error recovery mode. You can then
+      perform error recovery with `POST /runs/{id}/commands` and exit error
+      recovery mode with `POST /runs/{id}/actions`.
+
+    * `"assumeFalsePositiveAndContinue"`: Continue the run without interruption, acting
+      as if the error was a false positive.
+
+      This is equivalent to doing `"waitForRecovery"`
+      and then sending `actionType: "resume-from-recovery-assuming-false-positive"`
+      to `POST /runs/{id}/actions`, except this requires no ongoing intervention from
+      the client.
+
+    * `"ignoreAndContinue"`: Continue the run without interruption, accepting whatever
+      state the error left the robot in.
+
+      This is equivalent to doing `"waitForRecovery"`
+      and then sending `actionType: "resume-from-recovery"` to `POST /runs/{id}/actions`,
+      except this requires no ongoing intervention from the client.
+
+      This is probably not useful very often because it's likely to cause downstream
+      errors—imagine trying an `aspirate` command after a failed `pickUpTip` command.
+      This is provided for symmetry.
     """
 
-    IGNORE_AND_CONTINUE = "ignoreAndContinue"
     FAIL_RUN = "failRun"
     WAIT_FOR_RECOVERY = "waitForRecovery"
+    ASSUME_FALSE_POSITIVE_AND_CONTINUE = "assumeFalsePositiveAndContinue"
+    # todo(mm, 2024-10-22): "ignoreAndContinue" may be a misnomer now: is
+    # "assumeFalsePositiveAndContinue" not also a way to "ignore"? Consider renaming.
+    IGNORE_AND_CONTINUE = "ignoreAndContinue"
 
 
 class ErrorMatcher(BaseModel):
@@ -69,10 +92,7 @@ class ErrorRecoveryRule(BaseModel):
         ...,
         description="The criteria that must be met for this rule to be applied.",
     )
-    ifMatch: ReactionIfMatch = Field(
-        ...,
-        description="How to handle errors matched by this rule.",
-    )
+    ifMatch: ReactionIfMatch
 
 
 class ErrorRecoveryPolicy(BaseModel):
diff --git a/robot-server/robot_server/runs/router/base_router.py b/robot-server/robot_server/runs/router/base_router.py
index 788ca44aa1c..c108fa60a74 100644
--- a/robot-server/robot_server/runs/router/base_router.py
+++ b/robot-server/robot_server/runs/router/base_router.py
@@ -442,7 +442,6 @@ async def update_run(
         See `PATCH /errorRecovery/settings`.
         """
     ),
-    status_code=status.HTTP_201_CREATED,
     responses={
         status.HTTP_200_OK: {"model": SimpleEmptyBody},
         status.HTTP_409_CONFLICT: {"model": ErrorBody[RunStopped]},
diff --git a/robot-server/robot_server/runs/run_controller.py b/robot-server/robot_server/runs/run_controller.py
index 903bf22f252..1619cd20a08 100644
--- a/robot-server/robot_server/runs/run_controller.py
+++ b/robot-server/robot_server/runs/run_controller.py
@@ -2,6 +2,7 @@
 import logging
 from datetime import datetime
 from typing import Optional
+from typing_extensions import assert_never
 from opentrons.protocol_engine import ProtocolEngineError
 from opentrons_shared_data.errors.exceptions import RoboticsInteractionError
 
@@ -96,6 +97,17 @@ def create_action(
             elif action_type == RunActionType.RESUME_FROM_RECOVERY:
                 self._run_orchestrator_store.resume_from_recovery()
 
+            elif (
+                action_type
+                == RunActionType.RESUME_FROM_RECOVERY_ASSUMING_FALSE_POSITIVE
+            ):
+                # todo(mm, 2024-10-23): Connect to work in
+                # https://github.com/Opentrons/opentrons/pull/16556.
+                self._run_orchestrator_store.resume_from_recovery()
+
+            else:
+                assert_never(action_type)
+
         except ProtocolEngineError as e:
             raise RunActionNotAllowedError(message=e.message, wrapping=[e]) from e