Skip to content

Commit

Permalink
Allow wildcard datacenters to be specified in job file (#11170)
Browse files Browse the repository at this point in the history
Also allows for default value of `datacenters = ["*"]`
  • Loading branch information
jmwilkinson authored Feb 2, 2023
1 parent 41065ef commit 46f3977
Show file tree
Hide file tree
Showing 16 changed files with 103 additions and 46 deletions.
7 changes: 7 additions & 0 deletions .changelog/11170.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
```release-note:breaking-change
config: the `datacenter` field for agent configuration no longer accepts the `*` character as part of the datacenter name
```

```release-note:improvement
jobspec: the `datacenters` field now accepts wildcards
```
4 changes: 2 additions & 2 deletions command/agent/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -315,8 +315,8 @@ func (c *Command) IsValidConfig(config, cmdConfig *Config) bool {
}

// Check that the datacenter name does not contain invalid characters
if strings.ContainsAny(config.Datacenter, "\000") {
c.Ui.Error("Datacenter contains invalid characters")
if strings.ContainsAny(config.Datacenter, "\000*") {
c.Ui.Error("Datacenter contains invalid characters (null or '*')")
return false
}

Expand Down
7 changes: 5 additions & 2 deletions command/agent/command_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ func TestCommand_MetaConfigValidation(t *testing.T) {
}
}

func TestCommand_NullCharInDatacenter(t *testing.T) {
func TestCommand_InvalidCharInDatacenter(t *testing.T) {
ci.Parallel(t)

tmpDir := t.TempDir()
Expand All @@ -157,6 +157,9 @@ func TestCommand_NullCharInDatacenter(t *testing.T) {
"char-\\000-in-the-middle",
"ends-with-\\000",
"\\000-at-the-beginning",
"char-*-in-the-middle",
"ends-with-*",
"*-at-the-beginning",
}
for _, tc := range tcases {
configFile := filepath.Join(tmpDir, "conf1.hcl")
Expand Down Expand Up @@ -188,7 +191,7 @@ func TestCommand_NullCharInDatacenter(t *testing.T) {
}

out := ui.ErrorWriter.String()
exp := "Datacenter contains invalid characters"
exp := "Datacenter contains invalid characters (null or '*')"
if !strings.Contains(out, exp) {
t.Fatalf("expect to find %q\n\n%s", exp, out)
}
Expand Down
1 change: 0 additions & 1 deletion command/assets/connect-short.nomad
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
job "countdash" {
datacenters = ["dc1"]

group "api" {
network {
Expand Down
5 changes: 3 additions & 2 deletions command/assets/connect.nomad
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ job "countdash" {
# region = "global"
#
# The "datacenters" parameter specifies the list of datacenters which should
# be considered when placing this task. This must be provided.
datacenters = ["dc1"]
# be considered when placing this task. This accepts wildcards and defaults
# allowing placement on all datacenters.
datacenters = ["*"]

# The "type" parameter controls the type of job, which impacts the scheduler's
# decision on placement. This configuration is optional and defaults to
Expand Down
1 change: 0 additions & 1 deletion command/assets/example-short.nomad
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
job "example" {
datacenters = ["dc1"]

group "cache" {
network {
Expand Down
5 changes: 3 additions & 2 deletions command/assets/example.nomad
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@ job "example" {
# region = "global"
#
# The "datacenters" parameter specifies the list of datacenters which should
# be considered when placing this task. This must be provided.
datacenters = ["dc1"]
# be considered when placing this task. This accepts wildcards and defaults
# allowing placement on all datacenters.
datacenters = ["*"]

# The "type" parameter controls the type of job, which impacts the scheduler's
# decision on placement. This configuration is optional and defaults to
Expand Down
2 changes: 1 addition & 1 deletion command/testdata/example-short-bad.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"Job": {
"Region": null,
"Namespace": null,
"ID": "example",
"ID": "bad example",
"Name": "example",
"Type": null,
"Priority": null,
Expand Down
2 changes: 1 addition & 1 deletion nomad/node_endpoint.go
Original file line number Diff line number Diff line change
Expand Up @@ -1621,7 +1621,7 @@ func (n *Node) createNodeEvals(node *structs.Node, nodeIndex uint64) ([]string,
// datacenter cardinality tends to be low so the check
// shouldn't add much work.
for _, dc := range job.Datacenters {
if dc == node.Datacenter {
if node.IsInDC(dc) {
sysJobs = append(sysJobs, job)
break
}
Expand Down
9 changes: 9 additions & 0 deletions nomad/structs/structs.go
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ import (
psstructs "github.com/hashicorp/nomad/plugins/shared/structs"
"github.com/miekg/dns"
"github.com/mitchellh/copystructure"
"github.com/ryanuber/go-glob"
"golang.org/x/crypto/blake2b"
"golang.org/x/exp/maps"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -2294,6 +2295,10 @@ func (n *Node) ComparableResources() *ComparableResources {
}
}

func (n *Node) IsInDC(dc string) bool {
return glob.Glob(dc, n.Datacenter)
}

// Stub returns a summarized version of the node
func (n *Node) Stub(fields *NodeStubFields) *NodeListStub {

Expand Down Expand Up @@ -4364,6 +4369,10 @@ func (j *Job) Canonicalize() {
j.Namespace = DefaultNamespace
}

if len(j.Datacenters) == 0 {
j.Datacenters = []string{"*"}
}

for _, tg := range j.TaskGroups {
tg.Canonicalize(j)
}
Expand Down
6 changes: 3 additions & 3 deletions nomad/structs/structs_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ func TestJob_Validate(t *testing.T) {
Name: "my-job",
Type: JobTypeService,
Priority: 50,
Datacenters: []string{"dc1"},
Datacenters: []string{"*"},
TaskGroups: []*TaskGroup{
{
Name: "web",
Expand Down Expand Up @@ -91,7 +91,7 @@ func TestJob_Validate(t *testing.T) {
"group 3 missing name",
"Task group web validation failed",
)
// test for empty datacenters
// test for invalid datacenters
j = &Job{
Datacenters: []string{""},
}
Expand Down Expand Up @@ -372,7 +372,7 @@ func testJob() *Job {
Type: JobTypeService,
Priority: 50,
AllAtOnce: false,
Datacenters: []string{"dc1"},
Datacenters: []string{"*"},
Constraints: []*Constraint{
{
LTarget: "$attr.kernel.name",
Expand Down
10 changes: 5 additions & 5 deletions scheduler/generic_sched_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
"github.com/hashicorp/nomad/nomad/mock"
"github.com/hashicorp/nomad/nomad/structs"
"github.com/hashicorp/nomad/testutil"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"golang.org/x/exp/slices"
Expand Down Expand Up @@ -756,7 +757,7 @@ func TestServiceSched_Spread(t *testing.T) {
remaining := uint8(100 - start)
// Create a job that uses spread over data center
job := mock.Job()
job.Datacenters = []string{"dc1", "dc2"}
job.Datacenters = []string{"dc*"}
job.TaskGroups[0].Count = 10
job.TaskGroups[0].Spreads = append(job.TaskGroups[0].Spreads,
&structs.Spread{
Expand Down Expand Up @@ -1107,10 +1108,9 @@ func TestServiceSched_JobRegister_AllocFail(t *testing.T) {
t.Fatalf("bad: %#v", metrics)
}

// Check the available nodes
if count, ok := metrics.NodesAvailable["dc1"]; !ok || count != 0 {
t.Fatalf("bad: %#v", metrics)
}
_, ok = metrics.NodesAvailable["dc1"]
must.False(t, ok, must.Sprintf(
"expected NodesAvailable metric to be unpopulated when there are no nodes"))

// Check queued allocations
queued := outEval.QueuedAllocations["web"]
Expand Down
15 changes: 7 additions & 8 deletions scheduler/util.go
Original file line number Diff line number Diff line change
Expand Up @@ -361,10 +361,7 @@ func diffSystemAllocs(
// mapping of each data center to the count of ready nodes.
func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]struct{}, map[string]int, error) {
// Index the DCs
dcMap := make(map[string]int, len(dcs))
for _, dc := range dcs {
dcMap[dc] = 0
}
dcMap := make(map[string]int)

// Scan the nodes
ws := memdb.NewWatchSet()
Expand All @@ -386,11 +383,13 @@ func readyNodesInDCs(state State, dcs []string) ([]*structs.Node, map[string]str
notReady[node.ID] = struct{}{}
continue
}
if _, ok := dcMap[node.Datacenter]; !ok {
continue
for _, dc := range dcs {
if node.IsInDC(dc) {
out = append(out, node)
dcMap[node.Datacenter]++
break
}
}
out = append(out, node)
dcMap[node.Datacenter]++
}
return out, notReady, dcMap, nil
}
Expand Down
55 changes: 39 additions & 16 deletions scheduler/util_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import (
"time"

"github.com/hashicorp/nomad/ci"
"github.com/shoenig/test/must"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"

Expand Down Expand Up @@ -560,25 +561,47 @@ func TestReadyNodesInDCs(t *testing.T) {
node3.Datacenter = "dc2"
node3.Status = structs.NodeStatusDown
node4 := mock.DrainNode()
node5 := mock.Node()
node5.Datacenter = "not-this-dc"

require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1000, node1))
require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1001, node2))
require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1002, node3))
require.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1003, node4))
must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1000, node1)) // dc1 ready
must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1001, node2)) // dc2 ready
must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1002, node3)) // dc2 not ready
must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1003, node4)) // dc2 not ready
must.NoError(t, state.UpsertNode(structs.MsgTypeTestSetup, 1004, node5)) // ready never match

nodes, notReady, dc, err := readyNodesInDCs(state, []string{"dc1", "dc2"})
require.NoError(t, err)
require.Equal(t, 2, len(nodes))
require.NotEqual(t, node3.ID, nodes[0].ID)
require.NotEqual(t, node3.ID, nodes[1].ID)

require.Contains(t, dc, "dc1")
require.Equal(t, 1, dc["dc1"])
require.Contains(t, dc, "dc2")
require.Equal(t, 1, dc["dc2"])
testCases := []struct {
name string
datacenters []string
expectReady []*structs.Node
expectNotReady map[string]struct{}
expectIndex map[string]int
}{
{
name: "no wildcards",
datacenters: []string{"dc1", "dc2"},
expectReady: []*structs.Node{node1, node2},
expectNotReady: map[string]struct{}{node3.ID: struct{}{}, node4.ID: struct{}{}},
expectIndex: map[string]int{"dc1": 1, "dc2": 1},
},
{
name: "with wildcard",
datacenters: []string{"dc*"},
expectReady: []*structs.Node{node1, node2},
expectNotReady: map[string]struct{}{node3.ID: struct{}{}, node4.ID: struct{}{}},
expectIndex: map[string]int{"dc1": 1, "dc2": 1},
},
}

require.Contains(t, notReady, node3.ID)
require.Contains(t, notReady, node4.ID)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
ready, notReady, dcIndex, err := readyNodesInDCs(state, tc.datacenters)
must.NoError(t, err)
must.SliceContainsAll(t, tc.expectReady, ready, must.Sprint("expected ready to match"))
must.Eq(t, tc.expectNotReady, notReady, must.Sprint("expected not-ready to match"))
must.Eq(t, tc.expectIndex, dcIndex, must.Sprint("expected datacenter counts to match"))
})
}
}

func TestRetryMax(t *testing.T) {
Expand Down
6 changes: 4 additions & 2 deletions website/content/docs/job-specification/job.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,10 @@ job "docs" {
to define criteria for spreading allocations across a node attribute or metadata.
See the [Nomad spread reference][spread] for more details.

- `datacenters` `(array<string>: <required>)` - A list of datacenters in the region which are eligible
for task placement. This must be provided, and does not have a default.
- `datacenters` `(array<string>: ["*"])` - A list of datacenters in the region
which are eligible for task placement. This field allows wildcard globbing
through the use of `*` for multi-character matching. The default value is
`["*"]`, which allows the job to be placed in any available datacenter.

- `group` <code>([Group][group]: &lt;required&gt;)</code> - Specifies the start of a
group of tasks. This can be provided multiple times to define additional
Expand Down
14 changes: 14 additions & 0 deletions website/content/docs/upgrade/upgrade-specific.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,20 @@ from the Nomad client by setting [`set_environment_variables`][artifact_env].
The use of filesystem isolation can be disabled in Client configuration by
setting [`disable_filesystem_isolation`][artifact_fs_isolation].

#### Datacenter Wildcards

In Nomad 1.5.0, the
[`datacenters`][/nomad/docs/job-specification/job#datacenters] field for a job
accepts wildcards for multi-character matching. For example, `datacenters =
["dc*"]` will match all datacenters that start with `"dc"`. The default value
for `datacenters` is now `["*"]`, so the field can be omitted.

The `*` character is no longer a legal character in the
[`datacenter`][/nomad/docs/configuration#datacenter] field for an agent
configuration. Before upgrading to Nomad 1.5.0, you should first ensure that
you've updated any jobs that currently have a `*` in their datacenter name and
then ensure that no agents have this character in their `datacenter` field name.

#### Server `rejoin_after_leave` (default: `false`) now enforced

All Nomad versions prior to v1.5.0 have incorrectly ignored the Server
Expand Down

0 comments on commit 46f3977

Please sign in to comment.