From 0813de8971f86b4256a486b4cff22465b6398329 Mon Sep 17 00:00:00 2001 From: Michael Schurter Date: Sun, 31 Oct 2021 12:18:02 -0700 Subject: [PATCH 1/2] core: bump rejected plans from debug -> info As we have continued to see reports of #9506 we need to elevate this log line as it is the only way to detect when plans are being *erroneously* rejected. Users who see this log line repeatedly should drain and restart the node in the log line. This seems to workaorund the issue. Please post any details on #9506! --- .changelog/11416.txt | 3 +++ nomad/plan_apply.go | 8 +++++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 .changelog/11416.txt diff --git a/.changelog/11416.txt b/.changelog/11416.txt new file mode 100644 index 00000000000..92d4cbf4ab0 --- /dev/null +++ b/.changelog/11416.txt @@ -0,0 +1,3 @@ +```release-note:improvement +core: Elevated rejected node plan log lines to help diagnose #9506 +``` diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 19c8f1b56a6..4394b94df2a 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -477,7 +477,13 @@ func evaluatePlanPlacements(pool *EvaluatePool, snap *state.StateSnapshot, plan if !fit { // Log the reason why the node's allocations could not be made if reason != "" { - logger.Debug("plan for node rejected", "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) + //TODO This was debug level and should return + //to debug level in the future. However until + //https://github.com/hashicorp/nomad/issues/9506 + //is resolved this log line is the only way to + //monitor the disagreement between workers and + //the plan applier. + logger.Info("plan for node rejected", "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) } // Set that this is a partial commit partialCommit = true From 4e3d66cb17b9e4380b3fede293eb8348fa7afee6 Mon Sep 17 00:00:00 2001 From: Luiz Aoqui Date: Tue, 2 Nov 2021 20:43:54 -0400 Subject: [PATCH 2/2] add `/s/port-plan-failure` redirect and link to in in plan reject log message --- nomad/plan_apply.go | 3 ++- website/redirects.js | 8 ++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/nomad/plan_apply.go b/nomad/plan_apply.go index 4394b94df2a..7e97bd603fd 100644 --- a/nomad/plan_apply.go +++ b/nomad/plan_apply.go @@ -483,7 +483,8 @@ func evaluatePlanPlacements(pool *EvaluatePool, snap *state.StateSnapshot, plan //is resolved this log line is the only way to //monitor the disagreement between workers and //the plan applier. - logger.Info("plan for node rejected", "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) + logger.Info("plan for node rejected, refer to https://www.nomadproject.io/s/port-plan-failure for more information", + "node_id", nodeID, "reason", reason, "eval_id", plan.EvalID) } // Set that this is a partial commit partialCommit = true diff --git a/website/redirects.js b/website/redirects.js index 6b30950a843..929ee9f9042 100644 --- a/website/redirects.js +++ b/website/redirects.js @@ -189,6 +189,14 @@ module.exports = [ permanent: true, }, + // /s/* redirects for useful links that need a stable URL but we may need to + // change its destination in the future. + { + source: '/s/port-plan-failure', + destination: 'https://github.com/hashicorp/nomad/issues/9506', + permanent: false, + }, + // Spark guide links are all repointed to deprecated nomad-spark repo { source: '/guides/spark',