From f009308450fdc6df6b0642a0f3179307fb8617d7 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Sat, 17 Jul 2021 15:45:09 -0400 Subject: [PATCH 1/2] client: avoid acting on stale data after launch When the client launches, use a consistent read to fetch its own allocs, but allow stale read afterwards as long as reads don't revert into older state. This change addresses an edge case affecting restarting client. When a client restarts, it may fetch a stale data concerning its allocs: allocs that have completed prior to the client shutdown may still have "run/running" desired/client status, and have the client attempt to re-run again. An alternative approach is to track the indices such that the client set MinQueryIndex on the maximum index the client ever saw, or compare received allocs against locally restored client state. Garbage collection complicates this approach (local knowledge is not complete), and the approach still risks starting "dead" allocations (e.g. the allocation may have been placed when client just restarted and have already been reschuled by the time the client started. This approach here is effective against all kinds of stalness problems with small overhead. --- client/client.go | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/client/client.go b/client/client.go index 61ebed24b9f..2442bdb227d 100644 --- a/client/client.go +++ b/client/client.go @@ -2002,8 +2002,15 @@ func (c *Client) watchAllocations(updates chan *allocUpdates) { NodeID: c.NodeID(), SecretID: c.secretNodeID(), QueryOptions: structs.QueryOptions{ - Region: c.Region(), - AllowStale: true, + Region: c.Region(), + + // Make a consistent read query when the client starts + // to avoid acting on stale data that predates this + // client state before a client restart. + // + // After the first request, only require monotonically + // increasing state. + AllowStale: false, }, } var resp structs.NodeClientAllocsResponse @@ -2153,7 +2160,8 @@ OUTER: c.logger.Debug("updated allocations", "index", resp.Index, "total", len(resp.Allocs), "pulled", len(allocsResp.Allocs), "filtered", len(filtered)) - // Update the query index. + // After the first request, only require monotonically increasing state. + req.AllowStale = true if resp.Index > req.MinQueryIndex { req.MinQueryIndex = resp.Index } From f917f08f9e3b7f69311dfc40adcaec2f626c3525 Mon Sep 17 00:00:00 2001 From: Mahmood Ali Date: Mon, 19 Jul 2021 14:52:43 -0400 Subject: [PATCH 2/2] changelog --- .changelog/10907.txt | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .changelog/10907.txt diff --git a/.changelog/10907.txt b/.changelog/10907.txt new file mode 100644 index 00000000000..a709b2dc445 --- /dev/null +++ b/.changelog/10907.txt @@ -0,0 +1,4 @@ +```release-note:bug +client: Fixed a bug where a restarted client may start an already completed tasks in rare conditions +``` +