dask · hendrikmakait · Mar 8, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 1, 2024
@@ -1964,6 +1964,7 @@
                 )
 
                 v = a_recs.get(key, finish)
+                # The inner rec has higher priority? Is that always desired?
                 func = self._TRANSITIONS_TABLE["released", v]
                 b_recs, b_cmsgs, b_wmsgs = func(self, key, stimulus_id)
 
@@ -2082,7 +2083,11 @@
             assert not ts.who_has
             assert not ts.processing_on
             for dts in ts.dependencies:
-                assert dts.state not in {"forgotten", "erred"}
+                assert dts.state not in {"forgotten", "erred"}, (
+                    ts,
+                    dts,
+                    self.transition_log,
+                )
 
         if ts.has_lost_dependencies:
             return {key: "forgotten"}, {}, {}
@@ -2480,7 +2485,9 @@
             recommendations[key] = "forgotten"
         elif ts.has_lost_dependencies:
             recommendations[key] = "forgotten"
-        elif ts.who_wants or ts.waiters:
+        elif (ts.who_wants or ts.waiters) and not any(
+            dts.state in ("erred",) for dts in ts.dependencies
+        ):
             recommendations[key] = "waiting"
 
         for dts in ts.waiters or ():
@@ -2505,14 +2512,13 @@
             assert ts.exception_blame
             assert not ts.who_has
             assert not ts.waiting_on
-            assert not ts.waiters
 
         failing_ts = ts.exception_blame
         assert failing_ts
 
         for dts in ts.dependents:
-            dts.exception_blame = failing_ts
             if not dts.who_has:
+                dts.exception_blame = failing_ts
                 recommendations[dts.key] = "erred"
 
         report_msg = {
@@ -2547,6 +2553,9 @@
 
         for dts in ts.dependents:
             if dts.state == "erred":
+                # Does this make sense?
+                # This goes via released
+                # dts -> released -> waiting
                 recommendations[dts.key] = "waiting"
 
         w_msg = {
@@ -2621,8 +2630,8 @@
         self,
         key: Key,
         stimulus_id: str,
+        worker: str | None = None,
         *,
-        worker: str,
         cause: Key | None = None,
         exception: Serialized | None = None,
         traceback: Serialized | None = None,
@@ -2675,7 +2684,8 @@
 
         if not ts.erred_on:
             ts.erred_on = set()
-        ts.erred_on.add(worker)
+        if worker:
+            ts.erred_on.add(worker)
         if exception is not None:
             ts.exception = exception
             ts.exception_text = exception_text
@@ -2699,8 +2709,9 @@
         )
 
         for dts in ts.dependents:
-            dts.exception_blame = failing_ts
-            recommendations[dts.key] = "erred"
+            if dts.who_has:
+                dts.exception_blame = failing_ts
+                recommendations[dts.key] = "erred"
 
         for dts in ts.dependencies:
             if dts.waiters:

@@ -4890,3 +4890,64 @@ async def test_resubmit_different_task_same_key_warns_only_once(
 
     async with Worker(s.address):
         assert await c.gather(zs) == [2, 3, 4]  # Kept old ys
+
+
+def block(x, in_event, block_event):
+    in_event.set()
+    block_event.wait()
+    return x
+
+
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1, {"resources": {"a": 1}}), ("", 1, {"resources": {"b": 1}})],
+    config={"distributed.scheduler.allowed-failures": 1},
+    Worker=Nanny,
+)
+async def test_fan_out_pattern_deadlock(c, s, a, b):
+    """Regression test for https://github.com/dask/distributed/issues/8548
+
+    Conceptually, this test simulates a fan-out based workload, where the worker
+    that processed the single input task of the fan-out dies once the fan-out task is processed
+    but before the scheduler recognizes that the single input task has successfully been sent to
+    another worker which now processes a fan-out task. Then, workers continue to die during processing
+    of the input tasks.
+
+    This test heavily uses resources to force scheduling decisions.
+    """
+    in_ancestor = Event()
+    block_ancestor = Event()
+    in_on_a_descendant = Event()
+    in_on_b_descendant = Event()
+    block_descendants = Event()
+    await block_ancestor.set()
+    f = c.submit(block, 1, in_ancestor, block_ancestor, key="f", resources={"a": 1})
+    g = c.submit(inc, f, key="g", resources={"a": 1})
+    h1 = c.submit(
+        block, g, in_on_a_descendant, block_descendants, key="h1", resources={"a": 1}
+    )
+    h2 = c.submit(
+        block, g, in_on_b_descendant, block_descendants, key="h2", resources={"b": 1}
+    )
+
+    await asyncio.gather(
+        wait_for_state("g", "memory", s),
+        in_on_a_descendant.wait(),
+        in_on_b_descendant.wait(),
+    )
+    await asyncio.gather(in_ancestor.clear(), block_ancestor.clear())
+    await a.process.process.kill()
+
+    await in_ancestor.wait()
+    await in_ancestor.clear()
+    await a.process.process.kill()
+
+    await in_ancestor.wait()
+    await in_ancestor.clear()
+    await a.process.process.kill()
+
+    await block_descendants.set()
+    # await block_ancestor.set()
+    await h2
+    with pytest.raises(KilledWorker, match="Attempted to run task 'h1'"):
+        await h1