Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix ambiguous networks #1831

Merged
merged 9 commits into from
Sep 14, 2020
156 changes: 147 additions & 9 deletions pkg/cluster/internal/providers/docker/network.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,17 @@ limitations under the License.
package docker

import (
"bytes"
"crypto/sha1"
"encoding/binary"
"errors"
"encoding/json"
"io"
"net"
"regexp"
"sort"
"strings"

"sigs.k8s.io/kind/pkg/errors"
"sigs.k8s.io/kind/pkg/exec"
)

Expand All @@ -42,13 +46,15 @@ const fixedNetworkName = "kind"

// ensureNetwork checks if docker network by name exists, if not it creates it
func ensureNetwork(name string) error {
// TODO: the network might already exist and not have ipv6 ... :|
// discussion: https://github.com/kubernetes-sigs/kind/pull/1508#discussion_r414594198
exists, err := checkIfNetworkExists(name)
// check if network exists already and remove any duplicate networks
exists, err := removeDuplicateNetworks(name)
BenTheElder marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return err
}

// network already exists, we're good
// TODO: the network might already exist and not have ipv6 ... :|
// discussion: https://github.com/kubernetes-sigs/kind/pull/1508#discussion_r414594198
if exists {
return nil
}
Expand All @@ -57,7 +63,7 @@ func ensureNetwork(name string) error {
// obtained from the ULA fc00::/8 range
// Make N attempts with "probing" in case we happen to collide
subnet := generateULASubnetFromName(name, 0)
err = createNetwork(name, subnet)
err = createNetworkNoDuplicates(name, subnet)
if err == nil {
// Success!
return nil
Expand All @@ -69,8 +75,19 @@ func ensureNetwork(name string) error {
// If it is, make more attempts below
if isIPv6UnavailableError(err) {
// only one attempt, IPAM is automatic in ipv4 only
return createNetwork(name, "")
} else if !isPoolOverlapError(err) {
return createNetworkNoDuplicates(name, "")
BenTheElder marked this conversation as resolved.
Show resolved Hide resolved
} else if isPoolOverlapError(err) {
// pool overlap suggests perhaps another process created the network
// check if network exists already and remove any duplicate networks
exists, err := checkIfNetworkExists(name)
if err != nil {
return err
}
if exists {
return nil
}
// otherwise we'll start trying with different subnets
} else {
// unknown error ...
return err
}
Expand All @@ -79,18 +96,50 @@ func ensureNetwork(name string) error {
const maxAttempts = 5
for attempt := int32(1); attempt < maxAttempts; attempt++ {
subnet := generateULASubnetFromName(name, attempt)
err = createNetwork(name, subnet)
err = createNetworkNoDuplicates(name, subnet)
if err == nil {
// success!
return nil
} else if !isPoolOverlapError(err) {
} else if isPoolOverlapError(err) {
// pool overlap suggests perhaps another process created the network
// check if network exists already and remove any duplicate networks
exists, err := checkIfNetworkExists(name)
if err != nil {
return err
}
if exists {
return nil
}
// otherwise we'll try again
} else {
// unknown error ...
return err
}
}
return errors.New("exhausted attempts trying to find a non-overlapping subnet")
}

func createNetworkNoDuplicates(name, ipv6Subnet string) error {
if err := createNetwork(name, ipv6Subnet); err != nil && !isNetworkAlreadyExistsError(err) {
return err
}
_, err := removeDuplicateNetworks(name)
return err
}

func removeDuplicateNetworks(name string) (bool, error) {
networks, err := sortedNetworksWithName(name)
BenTheElder marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return false, err
}
if len(networks) > 1 {
if err := deleteNetworks(networks[1:]...); err != nil && !isOnlyErrorNoSuchNetwork(err) {
return false, err
}
}
return len(networks) > 0, nil
}

func createNetwork(name, ipv6Subnet string) error {
if ipv6Subnet == "" {
return exec.Command("docker", "network", "create", "-d=bridge",
Expand All @@ -102,6 +151,55 @@ func createNetwork(name, ipv6Subnet string) error {
"--ipv6", "--subnet", ipv6Subnet, name).Run()
}

func sortedNetworksWithName(name string) ([]string, error) {
// list all networks by this name
lsOut, err := exec.Output(exec.Command(
"docker", "network", "ls",
"--filter=name=^"+regexp.QuoteMeta(name)+"$",
"--format={{.ID}}", // output as unambiguous IDs
))
if err != nil {
return nil, err
}

// now inspect each
ids := strings.Split(strings.TrimSuffix(string(lsOut), "\n"), "\n")
inspectOut, err := exec.Output(exec.Command("docker", append([]string{"network", "inspect"}, ids...)...))
BenTheElder marked this conversation as resolved.
Show resolved Hide resolved
// NOTE: a network could be deleted between the ls and inspect calls, that's fine
if err != nil && !isOnlyErrorNoSuchNetwork(err) {
return nil, err
}

// parse
type networkInspectEntry struct {
ID string `json:"Id"`
// NOTE: we don't care about the contents here but we need to parse
// how many entries exist in the containers map
Containers map[string]map[string]string `json:"Containers"`
}

networks := []networkInspectEntry{}
if err := json.Unmarshal(inspectOut, &networks); err != nil {
return nil, errors.Wrap(err, "failed to decode networks list")
}

// deterministically sort networks
// NOTE: THIS PART IS IMPORTANT!
sort.Slice(networks, func(i, j int) bool {
if len(networks[i].Containers) < len(networks[j].Containers) {
BenTheElder marked this conversation as resolved.
Show resolved Hide resolved
return true
}
return networks[i].ID < networks[j].ID
})

// return network IDs
sortedIDs := make([]string, 0, len(networks))
for i := range networks {
sortedIDs = append(sortedIDs, networks[i].ID)
}
return sortedIDs, nil
}

func checkIfNetworkExists(name string) (bool, error) {
out, err := exec.Output(exec.Command(
"docker", "network", "ls",
Expand All @@ -121,6 +219,46 @@ func isPoolOverlapError(err error) bool {
return rerr != nil && strings.HasPrefix(string(rerr.Output), "Error response from daemon: Pool overlaps with other one on this address space")
}

func isNetworkAlreadyExistsError(err error) bool {
rerr := exec.RunErrorForError(err)
return rerr != nil && strings.HasPrefix(string(rerr.Output), "Error response from daemon: network with name") && strings.Contains(string(rerr.Output), "already exists")
}

// returns true if:
// - err is nil
// - err only contains no such network errors
func isOnlyErrorNoSuchNetwork(err error) bool {
rerr := exec.RunErrorForError(err)
if rerr == nil {
return false
}
// check all lines of output from errored command
b := bytes.NewBuffer(rerr.Output)
for {
l, err := b.ReadBytes('\n')
if err == io.EOF {
break
} else if err != nil {
return false
}
// if the line begins with Eror: No such network: it's fine
s := string(l)
if strings.HasPrefix(s, "Error: No such network:") {
continue
}
// other errors are not fine
if strings.HasPrefix(s, "Error: ") {
return false
}
// other line contents should just be network references
}
return true
}

func deleteNetworks(networks ...string) error {
return exec.Command("docker", append([]string{"network", "rm"}, networks...)...).Run()
}

// generateULASubnetFromName generate an IPv6 subnet based on the
// name and Nth probing attempt
func generateULASubnetFromName(name string, attempt int32) string {
Expand Down
56 changes: 56 additions & 0 deletions pkg/cluster/internal/providers/docker/network_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,9 +18,65 @@ package docker

import (
"fmt"
"regexp"
"strings"
"testing"

"sigs.k8s.io/kind/pkg/errors"
"sigs.k8s.io/kind/pkg/exec"

"sigs.k8s.io/kind/pkg/internal/integration"
)

func TestIntegrationEnsureNetworkConcurrent(t *testing.T) {
Copy link
Member Author

@BenTheElder BenTheElder Sep 11, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can consider reworking the make targets based on this pattern in a follow-up.

to run only "unit": go test -short ...
to run only "integration": go test -run ^TestIntegration ...

integration.MaybeSkip(t)

testNetworkName := "integration-test-ensure-kind-network"

// cleanup
cleanup := func() {
exec.Command("docker", "network", "rm", testNetworkName).Run()
}
cleanup()
defer cleanup()

// this is more than enough to trigger race conditions
networkConcurrency := 10

// Create multiple networks concurrently
errCh := make(chan error, networkConcurrency)
for i := 0; i < networkConcurrency; i++ {
go func() {
errCh <- ensureNetwork(testNetworkName)
}()
}
for i := 0; i < networkConcurrency; i++ {
if err := <-errCh; err != nil {
t.Errorf("error creating network: %v", err)
rerr := exec.RunErrorForError(err)
if rerr != nil {
t.Errorf("%q", rerr.Output)
t.Errorf("%v", strings.HasPrefix(string(rerr.Output), "Error response from daemon: network with name") && strings.HasSuffix(string(rerr.Output), "already exists\n"))
}
t.Errorf("%+v", errors.StackTrace(err))
}
}

cmd := exec.Command(
"docker", "network", "ls",
fmt.Sprintf("--filter=name=^%s$", regexp.QuoteMeta(testNetworkName)),
"--format={{.Name}}",
)

lines, err := exec.OutputLines(cmd)
if err != nil {
t.Errorf("obtaining the docker networks")
}
if len(lines) != 1 {
t.Errorf("wrong number of networks created: %d", len(lines))
}
}

func Test_generateULASubnetFromName(t *testing.T) {
t.Parallel()
cases := []struct {
Expand Down
33 changes: 33 additions & 0 deletions pkg/internal/integration/integration.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
Copyright 2020 The Kubernetes Authors.

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package integration

import "testing"

// *testing.T methods used by assert
type testingDotT interface {
Skip(args ...interface{})
}

// MaybeSkip skips if integration tests should be skipped
// currently this is when testing.Short() is true
// This should be called at the beginning of an integration test
func MaybeSkip(t testingDotT) {
if testing.Short() {
t.Skip("Skipping integration test due to -short")
}
}