From 845600234cb5e98c95ad8c3ce743bafbb46ffc2b Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 13 Aug 2020 05:41:10 +0200 Subject: [PATCH] podman networking Since 0.8.0 KIND uses custom networks with docker to leverage the embedded DNS server and other features. This provides the same functionality for podman, however, due to the difference networking implementation of both (libnetwork vs CNI) podman uses a different networking model for cluster. It creates one network per cluster with the same lifecycle of the cluster, i.e. the network is deleted when the cluster is deleted. fix load balancer for podman docker uses a custom DNS server listening in 127.0.0.1:53 for custom networks, however, podman does not. The loadbalancer docker image does not have the same "network magic" we are using for the node images, thus we need to configure a different resolver depending on the provider. Signed-off-by: Antonio Ojea asd --- .../internal/create/actions/config/config.go | 2 +- pkg/cluster/internal/delete/delete.go | 17 +-- pkg/cluster/internal/loadbalancer/config.go | 5 +- .../internal/providers/docker/provider.go | 9 +- .../internal/providers/podman/network.go | 124 ++++++++++++++++++ pkg/cluster/internal/providers/podman/node.go | 6 +- .../internal/providers/podman/provider.go | 70 +++++----- .../internal/providers/podman/provision.go | 13 +- pkg/cluster/internal/providers/podman/util.go | 19 ++- pkg/cluster/internal/providers/provider.go | 6 +- 10 files changed, 204 insertions(+), 67 deletions(-) create mode 100644 pkg/cluster/internal/providers/podman/network.go diff --git a/pkg/cluster/internal/create/actions/config/config.go b/pkg/cluster/internal/create/actions/config/config.go index a233c31409..0f0e97a121 100644 --- a/pkg/cluster/internal/create/actions/config/config.go +++ b/pkg/cluster/internal/create/actions/config/config.go @@ -200,7 +200,7 @@ func getKubeadmConfig(cfg *config.Cluster, data kubeadm.ConfigData, node nodes.N // configure the right protocol addresses if cfg.Networking.IPFamily == "ipv6" { if nodeAddressIPv6 == "" { - return "", errors.Errorf("failed to get IPV6 address; is the docker daemon configured to use IPV6 correctly?") + return "", errors.Errorf("failed to get IPV6 address; is the provider configured to use IPV6 correctly?") } data.NodeAddress = nodeAddressIPv6 } diff --git a/pkg/cluster/internal/delete/delete.go b/pkg/cluster/internal/delete/delete.go index 550cc457e4..bc533be31e 100644 --- a/pkg/cluster/internal/delete/delete.go +++ b/pkg/cluster/internal/delete/delete.go @@ -17,7 +17,6 @@ limitations under the License. package delete import ( - "sigs.k8s.io/kind/pkg/errors" "sigs.k8s.io/kind/pkg/log" "sigs.k8s.io/kind/pkg/cluster/internal/kubeconfig" @@ -28,22 +27,16 @@ import ( // explicitKubeconfigPath is --kubeconfig, following the rules from // https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands func Cluster(logger log.Logger, p providers.Provider, name, explicitKubeconfigPath string) error { - n, err := p.ListNodes(name) + err := p.DeleteCluster(name) if err != nil { - return errors.Wrap(err, "error listing nodes") - } - - kerr := kubeconfig.Remove(name, explicitKubeconfigPath) - if kerr != nil { - logger.Errorf("failed to update kubeconfig: %v", kerr) + return err } - err = p.DeleteNodes(n) + err = kubeconfig.Remove(name, explicitKubeconfigPath) if err != nil { + logger.Errorf("failed to update kubeconfig: %v", err) return err } - if kerr != nil { - return err - } + return nil } diff --git a/pkg/cluster/internal/loadbalancer/config.go b/pkg/cluster/internal/loadbalancer/config.go index 71e17388b3..9293dc9d43 100644 --- a/pkg/cluster/internal/loadbalancer/config.go +++ b/pkg/cluster/internal/loadbalancer/config.go @@ -37,8 +37,9 @@ global log /dev/log local1 notice daemon -resolvers docker +resolvers kinddns nameserver dns 127.0.0.11:53 + parse-resolv-conf defaults log global @@ -62,7 +63,7 @@ backend kube-apiservers option httpchk GET /healthz # TODO: we should be verifying (!) {{range $server, $address := .BackendServers}} - server {{ $server }} {{ $address }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} + server {{ $server }} {{ $address }} check check-ssl verify none resolvers kinddns resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} {{- end}} ` diff --git a/pkg/cluster/internal/providers/docker/provider.go b/pkg/cluster/internal/providers/docker/provider.go index 4e0a9e3e1e..c2fac76156 100644 --- a/pkg/cluster/internal/providers/docker/provider.go +++ b/pkg/cluster/internal/providers/docker/provider.go @@ -131,8 +131,13 @@ func (p *provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *provider) DeleteNodes(n []nodes.Node) error { +// DeleteCluster is part of the providers.Provider interface +func (p *provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") + } + if len(n) == 0 { return nil } diff --git a/pkg/cluster/internal/providers/podman/network.go b/pkg/cluster/internal/providers/podman/network.go new file mode 100644 index 0000000000..776dd2b10e --- /dev/null +++ b/pkg/cluster/internal/providers/podman/network.go @@ -0,0 +1,124 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podman + +import ( + "crypto/sha1" + "encoding/binary" + "fmt" + "net" + "strings" + + "sigs.k8s.io/kind/pkg/errors" + "sigs.k8s.io/kind/pkg/exec" +) + +// By default podman creates one network per cluster, this allows to use +// DNS to resolve container names and use the corresponding IP family, since +// podman does not support dual stack containers yet in the `podman network` +// However, podman uses CNI, and it is possible to creates a CNI config file +// manually for podman to provide dual-stack if necessary. +// +// For now this also makes it easier for apps to join the same network, and +// leaves users with complex networking desires to create and manage their own +// networks. +const fixedNetworkPrefix = "kind" + +// ensureNetwork creates a new network with the prefix + cluster name +func ensureNetwork(name string, isIPv6 bool) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + // TODO: revisit for dual stack + subnet := "" + if isIPv6 { + // generate unique subnet per network based on the name + // obtained from the ULA fc00::/8 range + // Make N attempts with "probing" in case we happen to collide + subnet = generateULASubnetFromName(networkName, 0) + } + err := createNetwork(networkName, subnet) + if err == nil { + // Success! + return nil + } + + // Only continue if the error is because of the subnet range + // is already allocated + if !isPoolOverlapError(err) { + return err + } + + // keep trying for ipv6 subnets + const maxAttempts = 5 + for attempt := int32(1); attempt < maxAttempts; attempt++ { + subnet := generateULASubnetFromName(networkName, attempt) + err = createNetwork(networkName, subnet) + if err == nil { + // success! + return nil + } else if !isPoolOverlapError(err) { + // unknown error ... + return err + } + } + return errors.New("exhausted attempts trying to find a non-overlapping subnet") + +} + +func createNetwork(name, subnet string) error { + if subnet != "" { + return exec.Command("podman", "network", "create", "-d=bridge", + "--subnet", subnet, name).Run() + } + return exec.Command("podman", "network", "create", "-d=bridge", + name).Run() +} + +// delete network if exists, otherwise do nothing +func deleteNetwork(name string) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + if err := exec.Command("podman", "network", "inspect", networkName).Run(); err != nil { + return nil + } + return exec.Command("podman", "network", "rm", networkName).Run() +} + +func isPoolOverlapError(err error) bool { + rerr := exec.RunErrorForError(err) + return rerr != nil && + (strings.Contains(string(rerr.Output), "is being used by a network interface") || + strings.Contains(string(rerr.Output), "is already being used by a cni configuration")) +} + +// generateULASubnetFromName generate an IPv6 subnet based on the +// name and Nth probing attempt +func generateULASubnetFromName(name string, attempt int32) string { + ip := make([]byte, 16) + ip[0] = 0xfc + ip[1] = 0x00 + h := sha1.New() + _, _ = h.Write([]byte(name)) + _ = binary.Write(h, binary.LittleEndian, attempt) + bs := h.Sum(nil) + for i := 2; i < 8; i++ { + ip[i] = bs[i] + } + subnet := &net.IPNet{ + IP: net.IP(ip), + Mask: net.CIDRMask(64, 128), + } + return subnet.String() +} diff --git a/pkg/cluster/internal/providers/podman/node.go b/pkg/cluster/internal/providers/podman/node.go index 4a530f08b2..8c29898406 100644 --- a/pkg/cluster/internal/providers/podman/node.go +++ b/pkg/cluster/internal/providers/podman/node.go @@ -52,10 +52,8 @@ func (n *node) Role() (string, error) { func (n *node) IP() (ipv4 string, ipv6 string, err error) { // retrieve the IP address of the node using podman inspect - cmd := exec.Command("podman", "inspect", - "-f", "{{.NetworkSettings.IPAddress}},{{.NetworkSettings.GlobalIPv6Address}}", - n.name, // ... against the "node" container - ) + format := `{{range .NetworkSettings.Networks}}{{.IPAddress}},{{.GlobalIPv6Address}}{{end}}` + cmd := exec.Command("podman", "inspect", "-f", format, n.name) lines, err := exec.OutputLines(cmd) if err != nil { return "", "", errors.Wrap(err, "failed to get container details") diff --git a/pkg/cluster/internal/providers/podman/provider.go b/pkg/cluster/internal/providers/podman/provider.go index b338491b94..4ed0e415a3 100644 --- a/pkg/cluster/internal/providers/podman/provider.go +++ b/pkg/cluster/internal/providers/podman/provider.go @@ -79,6 +79,10 @@ func (p *provider) Provision(status *cli.Status, cfg *config.Cluster) (err error return err } + if err := ensureNetwork(cfg.Name, clusterIsIPv6(cfg)); err != nil { + return errors.Wrap(err, "failed to ensure podman network") + } + // actually provision the cluster icons := strings.Repeat("📦 ", len(cfg.Nodes)) status.Start(fmt.Sprintf("Preparing nodes %s", icons)) @@ -133,33 +137,41 @@ func (p *provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *provider) DeleteNodes(n []nodes.Node) error { - if len(n) == 0 { - return nil - } - const command = "podman" - args := make([]string, 0, len(n)+3) // allocate once - args = append(args, - "rm", - "-f", // force the container to be delete now - "-v", // delete volumes - ) - for _, node := range n { - args = append(args, node.String()) - } - if err := exec.Command(command, args...).Run(); err != nil { - return errors.Wrap(err, "failed to delete nodes") +// DeleteCluster is part of the providers.Provider interface +func (p *provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") } - var nodeVolumes []string - for _, node := range n { - volumes, err := getVolumes(node.String()) - if err != nil { + + // delete nodes + if len(n) != 0 { + const command = "podman" + args := make([]string, 0, len(n)+3) // allocate once + args = append(args, + "rm", + "-f", // force the container to be delete now + "-v", // delete volumes + ) + for _, node := range n { + args = append(args, node.String()) + } + if err := exec.Command(command, args...).Run(); err != nil { + return errors.Wrap(err, "failed to delete nodes") + } + var nodeVolumes []string + for _, node := range n { + volumes, err := getVolumes(node.String()) + if err != nil { + return err + } + nodeVolumes = append(nodeVolumes, volumes...) + } + if err := deleteVolumes(nodeVolumes); err != nil { return err } - nodeVolumes = append(nodeVolumes, volumes...) } - return deleteVolumes(nodeVolumes) + return deleteNetwork(name) } // GetAPIServerEndpoint is part of the providers.Provider interface @@ -245,16 +257,10 @@ func (p *provider) GetAPIServerInternalEndpoint(cluster string) (string, error) } n, err := nodeutils.APIServerEndpointNode(allNodes) if err != nil { - return "", errors.Wrap(err, "failed to get apiserver endpoint") - } - // TODO: check cluster IP family and return the correct IP - // This means IPv6 singlestack is broken on podman - ipv4, _, err := n.IP() - if err != nil { - return "", errors.Wrap(err, "failed to get apiserver IP") + return "", errors.Wrap(err, "failed to get api server endpoint") } - return net.JoinHostPort(ipv4, fmt.Sprintf("%d", common.APIServerInternalPort)), nil - + // NOTE: we're using the nodes's hostnames which are their names + return net.JoinHostPort(n.String(), fmt.Sprintf("%d", common.APIServerInternalPort)), nil } // node returns a new node handle for this provider diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index f5fd591463..1a1fb2de36 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -140,6 +140,8 @@ func commonArgs(cfg *config.Cluster) ([]string, error) { args := []string{ "--detach", // run the container detached "--tty", // allocate a tty for entrypoint logs + // attach to its own network + "--network", fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name), // label the node with the cluster ID "--label", fmt.Sprintf("%s=%s", clusterLabelKey, cfg.Name), } @@ -258,8 +260,9 @@ func getProxyEnv(cfg *config.Cluster) (map[string]string, error) { envs := common.GetProxyEnvs(cfg) // Specifically add the podman network subnets to NO_PROXY if we are using a proxy if len(envs) > 0 { - // podman default bridge network is named "bridge" (https://docs.podman.com/network/bridge/#use-the-default-bridge-network) - subnets, err := getSubnets("bridge") + // podman creates a network per cluster + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name) + subnets, err := getSubnets(networkName) if err != nil { return nil, err } @@ -277,8 +280,10 @@ func getProxyEnv(cfg *config.Cluster) (map[string]string, error) { return envs, nil } -func getSubnets(networkName string) ([]string, error) { - format := `{{range (index (index . "IPAM") "Config")}}{{index . "Subnet"}} {{end}}` +func getSubnets(name string) ([]string, error) { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + // TODO(dualstack): podman currently only supports one range per network + format := `{{range (index (index .plugins 0).ipam.ranges 0)}}{{.subnet}}{{end}}` cmd := exec.Command("podman", "network", "inspect", "-f", format, networkName) lines, err := exec.CombinedOutputLines(cmd) if err != nil { diff --git a/pkg/cluster/internal/providers/podman/util.go b/pkg/cluster/internal/providers/podman/util.go index f3d19d0f21..2dceb6c57c 100644 --- a/pkg/cluster/internal/providers/podman/util.go +++ b/pkg/cluster/internal/providers/podman/util.go @@ -93,16 +93,23 @@ func getVolumes(label string) ([]string, error) { "volume", "ls", "--filter", fmt.Sprintf("label=%s", label), - "--quiet") + "--format", "{{.Name}}") // `output` from the above command is names of all volumes each followed by `\n`. - output, err := exec.Output(cmd) + output, err := exec.OutputLines(cmd) if err != nil { return nil, err } - // Trim away the last `\n`. - trimmedOutput := strings.TrimSuffix(string(output), "\n") - // Get names of all volumes by splitting via `\n`. - return strings.Split(string(trimmedOutput), "\n"), nil + + // remove volumes with empty name + // https://github.com/golang/go/wiki/SliceTricks#filter-in-place + n := 0 + for _, v := range output { + if v != "" { + output[n] = v + n++ + } + } + return output[:n], nil } func deleteVolumes(names []string) error { diff --git a/pkg/cluster/internal/providers/provider.go b/pkg/cluster/internal/providers/provider.go index 6e28c4dc79..5df353f4d3 100644 --- a/pkg/cluster/internal/providers/provider.go +++ b/pkg/cluster/internal/providers/provider.go @@ -35,10 +35,8 @@ type Provider interface { // ListNodes returns the nodes under this provider for the given // cluster name, they may or may not be running correctly ListNodes(cluster string) ([]nodes.Node, error) - // DeleteNodes deletes the provided list of nodes - // These should be from results previously returned by this provider - // E.G. by ListNodes() - DeleteNodes([]nodes.Node) error + // DeleteCluster deletes the cluster resources (nodes and/or network) + DeleteCluster(cluster string) error // GetAPIServerEndpoint returns the host endpoint for the cluster's API server GetAPIServerEndpoint(cluster string) (string, error) // GetAPIServerEndpoint returns the internal network endpoint for the cluster's API server