Skip to content

Commit

Permalink
Uses canonical CANCELLING states for run cancellations (#8245)
Browse files Browse the repository at this point in the history
  • Loading branch information
anticorrelator authored and github-actions[bot] committed Jan 26, 2023
1 parent 85be85d commit 0442db2
Show file tree
Hide file tree
Showing 5 changed files with 106 additions and 27 deletions.
15 changes: 14 additions & 1 deletion src/prefect/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ async def check_for_cancelled_flow_runs(self):
async for work_queue in self.get_work_queues():
work_queue_names.add(work_queue.name)

cancelling_flow_runs = await self.client.read_flow_runs(
named_cancelling_flow_runs = await self.client.read_flow_runs(
flow_run_filter=FlowRunFilter(
state=FlowRunFilterState(
type=FlowRunFilterStateType(any_=[StateType.CANCELLED]),
Expand All @@ -283,6 +283,19 @@ async def check_for_cancelled_flow_runs(self):
),
)

typed_cancelling_flow_runs = await self.client.read_flow_runs(
flow_run_filter=FlowRunFilter(
state=FlowRunFilterState(
type=FlowRunFilterStateType(any_=[StateType.CANCELLING]),
),
work_queue_name=FlowRunFilterWorkQueueName(any_=list(work_queue_names)),
# Avoid duplicate cancellation calls
id=FlowRunFilterId(not_any_=list(self.cancelling_flow_run_ids)),
),
)

cancelling_flow_runs = named_cancelling_flow_runs + typed_cancelling_flow_runs

if cancelling_flow_runs:
self.logger.info(
f"Found {len(cancelling_flow_runs)} flow runs awaiting cancellation."
Expand Down
2 changes: 1 addition & 1 deletion src/prefect/cli/flow_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ async def delete(id: UUID):
async def cancel(id: UUID):
"""Cancel a flow fun by ID."""
async with get_client() as client:
cancelling_state = State(type=StateType.CANCELLED, name="Cancelling")
cancelling_state = State(type=StateType.CANCELLING)
try:
result = await client.set_flow_run_state(
flow_run_id=id, state=cancelling_state
Expand Down
9 changes: 9 additions & 0 deletions src/prefect/states.py
Original file line number Diff line number Diff line change
Expand Up @@ -498,6 +498,15 @@ def Crashed(cls: Type[State] = State, **kwargs) -> State:
return schemas.states.Crashed(cls=cls, **kwargs)


def Cancelling(cls: Type[State] = State, **kwargs) -> State:
"""Convenience function for creating `Cancelling` states.
Returns:
State: a Cancelling state
"""
return schemas.states.Cancelling(cls=cls, **kwargs)


def Cancelled(cls: Type[State] = State, **kwargs) -> State:
"""Convenience function for creating `Cancelled` states.
Expand Down
104 changes: 81 additions & 23 deletions tests/agent/test_agent_run_cancellation.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,15 @@
from prefect.infrastructure.base import Infrastructure
from prefect.orion.database.orm_models import ORMDeployment
from prefect.orion.schemas.core import Deployment
from prefect.states import Cancelled, Completed, Pending, Running, Scheduled
from prefect.states import Cancelled, Cancelling, Completed, Pending, Running, Scheduled
from prefect.testing.utilities import AsyncMock
from prefect.utilities.dispatch import get_registry_for_type


def legacy_named_cancelling_state(**kwargs):
return Cancelled(name="Cancelling", **kwargs)


async def _create_test_deployment_from_orm(
orion_client: OrionClient, orm_deployment: ORMDeployment, **kwargs
) -> Deployment:
Expand All @@ -42,12 +46,15 @@ async def _create_test_deployment_from_orm(
# Test cancellation is called for the correct flow runs -------------------------------


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_called_for_cancelling_run(
orion_client: OrionClient, deployment: ORMDeployment
orion_client: OrionClient, deployment: ORMDeployment, cancelling_constructor
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(
Expand Down Expand Up @@ -89,15 +96,20 @@ async def test_agent_cancel_run_not_called_for_other_states(
agent.cancel_run.assert_not_called()


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_called_for_cancelling_run_with_multiple_work_queues(
orion_client: OrionClient, deployment: ORMDeployment
orion_client: OrionClient,
deployment: ORMDeployment,
cancelling_constructor,
):
deployment.work_queue_name = "foo"
await orion_client.update_deployment(deployment)

flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(work_queues=["foo", "bar"], prefetch_seconds=10) as agent:
Expand All @@ -107,8 +119,13 @@ async def test_agent_cancel_run_called_for_cancelling_run_with_multiple_work_que
agent.cancel_run.assert_awaited_once_with(flow_run)


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_called_for_each_cancelling_run_in_multiple_work_queues(
orion_client: OrionClient, deployment: ORMDeployment
orion_client: OrionClient,
deployment: ORMDeployment,
cancelling_constructor,
):
deployment_foo = await _create_test_deployment_from_orm(
orion_client, deployment, work_queue_name="foo"
Expand All @@ -119,11 +136,11 @@ async def test_agent_cancel_run_called_for_each_cancelling_run_in_multiple_work_

flow_run_foo = await orion_client.create_flow_run_from_deployment(
deployment_foo.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)
flow_run_bar = await orion_client.create_flow_run_from_deployment(
deployment_bar.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(work_queues=["foo", "bar"], prefetch_seconds=10) as agent:
Expand All @@ -135,24 +152,27 @@ async def test_agent_cancel_run_called_for_each_cancelling_run_in_multiple_work_
)


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_called_for_each_cancelling_run_in_a_work_queue(
orion_client: OrionClient, deployment: ORMDeployment
orion_client: OrionClient, deployment: ORMDeployment, cancelling_constructor
):
deployment_foo = await _create_test_deployment_from_orm(
orion_client, deployment, work_queue_name="foo"
)

flow_run_1 = await orion_client.create_flow_run_from_deployment(
deployment_foo.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)
flow_run_2 = await orion_client.create_flow_run_from_deployment(
deployment_foo.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)
flow_run_3 = await orion_client.create_flow_run_from_deployment(
deployment_foo.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(work_queues=["foo"], prefetch_seconds=10) as agent:
Expand All @@ -164,12 +184,15 @@ async def test_agent_cancel_run_called_for_each_cancelling_run_in_a_work_queue(
)


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_not_called_for_other_work_queues(
orion_client: OrionClient, deployment
orion_client: OrionClient, deployment, cancelling_constructor
):
await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(
Expand Down Expand Up @@ -201,14 +224,18 @@ def mock_infrastructure_kill(monkeypatch) -> Generator[AsyncMock, None, None]:
yield mock


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_kills_run_with_infrastructure_pid(
orion_client: OrionClient,
deployment: ORMDeployment,
mock_infrastructure_kill: AsyncMock,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
Expand All @@ -221,15 +248,19 @@ async def test_agent_cancel_run_kills_run_with_infrastructure_pid(
mock_infrastructure_kill.assert_awaited_once_with("test")


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_with_missing_infrastructure_pid(
orion_client: OrionClient,
deployment: ORMDeployment,
mock_infrastructure_kill: AsyncMock,
caplog,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

async with OrionAgent(
Expand All @@ -252,13 +283,17 @@ async def test_agent_cancel_run_with_missing_infrastructure_pid(


@pytest.mark.usefixtures("mock_infrastructure_kill")
@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_updates_state_name(
orion_client: OrionClient,
deployment: ORMDeployment,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
Expand All @@ -273,15 +308,19 @@ async def test_agent_cancel_run_updates_state_name(


@pytest.mark.usefixtures("mock_infrastructure_kill")
@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_preserves_other_state_properties(
orion_client: OrionClient,
deployment: ORMDeployment,
cancelling_constructor,
):
expected_changed_fields = {"name", "timestamp", "id"}

flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling", message="test"),
state=cancelling_constructor(message="test"),
)

await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
Expand All @@ -297,15 +336,19 @@ async def test_agent_cancel_run_preserves_other_state_properties(
) == flow_run.state.dict(exclude=expected_changed_fields)


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_with_infrastructure_not_available_during_kill(
orion_client: OrionClient,
deployment: ORMDeployment,
mock_infrastructure_kill: AsyncMock,
caplog,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
Expand Down Expand Up @@ -333,15 +376,19 @@ async def test_agent_cancel_run_with_infrastructure_not_available_during_kill(
assert post_flow_run.state.message is None


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_with_infrastructure_not_found_during_kill(
orion_client: OrionClient,
deployment: ORMDeployment,
mock_infrastructure_kill: AsyncMock,
caplog,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)

await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
Expand All @@ -368,15 +415,19 @@ async def test_agent_cancel_run_with_infrastructure_not_found_during_kill(
assert post_flow_run.state.message is None


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_with_unknown_error_during_kill(
orion_client: OrionClient,
deployment: ORMDeployment,
mock_infrastructure_kill: AsyncMock,
caplog,
cancelling_constructor,
):
flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)
await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")
mock_infrastructure_kill.side_effect = ValueError("Oh no!")
Expand All @@ -402,8 +453,15 @@ async def test_agent_cancel_run_with_unknown_error_during_kill(
assert "Traceback" in caplog.text


@pytest.mark.parametrize(
"cancelling_constructor", [legacy_named_cancelling_state, Cancelling]
)
async def test_agent_cancel_run_without_infrastructure_support_for_kill(
orion_client: OrionClient, deployment: ORMDeployment, caplog, monkeypatch
orion_client: OrionClient,
deployment: ORMDeployment,
caplog,
monkeypatch,
cancelling_constructor,
):

# Patch all infrastructure types
Expand All @@ -415,7 +473,7 @@ async def test_agent_cancel_run_without_infrastructure_support_for_kill(

flow_run = await orion_client.create_flow_run_from_deployment(
deployment.id,
state=Cancelled(name="Cancelling"),
state=cancelling_constructor(),
)
await orion_client.update_flow_run(flow_run.id, infrastructure_pid="test")

Expand Down
3 changes: 1 addition & 2 deletions tests/cli/test_flow_run.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,8 +246,7 @@ async def test_non_terminal_states_set_to_cancelled(self, orion_client, state):
after = await orion_client.read_flow_run(before.id)
assert before.state.name != after.state.name
assert before.state.type != after.state.type
assert after.state.name == "Cancelling"
assert after.state.type == StateType.CANCELLED
assert after.state.type == StateType.CANCELLING

@pytest.mark.parametrize("state", [Completed, Failed, Crashed, Cancelled])
async def test_cancelling_terminal_states_exits_with_error(
Expand Down

0 comments on commit 0442db2

Please sign in to comment.