From 9cd0819ec9e6697dc3687fe45fb00b69ec35bdd9 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 13 Aug 2020 05:41:10 +0200 Subject: [PATCH] podman networking Since 0.8.0 KIND uses custom networks with docker to leverage the embedded DNS server and other features. This provides the same functionality for podman, however, due to the difference networking implementation of both (libnetwork vs CNI) podman uses a different networking model for cluster. It creates one network per cluster with the same lifecycle of the cluster, i.e. the network is deleted when the cluster is deleted. --- .../internal/create/actions/config/config.go | 2 +- pkg/cluster/internal/delete/delete.go | 17 +-- .../internal/providers/docker/provider.go | 9 +- .../internal/providers/podman/network.go | 120 ++++++++++++++++++ pkg/cluster/internal/providers/podman/node.go | 7 +- .../internal/providers/podman/provider.go | 31 +++-- .../internal/providers/podman/provision.go | 9 +- .../internal/providers/provider/provider.go | 6 +- 8 files changed, 166 insertions(+), 35 deletions(-) create mode 100644 pkg/cluster/internal/providers/podman/network.go diff --git a/pkg/cluster/internal/create/actions/config/config.go b/pkg/cluster/internal/create/actions/config/config.go index eff060f415..8aca331968 100644 --- a/pkg/cluster/internal/create/actions/config/config.go +++ b/pkg/cluster/internal/create/actions/config/config.go @@ -197,7 +197,7 @@ func getKubeadmConfig(cfg *config.Cluster, data kubeadm.ConfigData, node nodes.N // configure the right protocol addresses if cfg.Networking.IPFamily == "ipv6" { if nodeAddressIPv6 == "" { - return "", errors.Errorf("failed to get IPV6 address; is the docker daemon configured to use IPV6 correctly?") + return "", errors.Errorf("failed to get IPV6 address; is the provider configured to use IPV6 correctly?") } data.NodeAddress = nodeAddressIPv6 } diff --git a/pkg/cluster/internal/delete/delete.go b/pkg/cluster/internal/delete/delete.go index 4cafc2db49..04fa8d583b 100644 --- a/pkg/cluster/internal/delete/delete.go +++ b/pkg/cluster/internal/delete/delete.go @@ -17,7 +17,6 @@ limitations under the License. package delete import ( - "sigs.k8s.io/kind/pkg/errors" "sigs.k8s.io/kind/pkg/log" "sigs.k8s.io/kind/pkg/cluster/internal/kubeconfig" @@ -28,22 +27,16 @@ import ( // explicitKubeconfigPath is --kubeconfig, following the rules from // https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands func Cluster(logger log.Logger, p provider.Provider, name, explicitKubeconfigPath string) error { - n, err := p.ListNodes(name) + err := p.DeleteCluster(name) if err != nil { - return errors.Wrap(err, "error listing nodes") - } - - kerr := kubeconfig.Remove(name, explicitKubeconfigPath) - if kerr != nil { - logger.Errorf("failed to update kubeconfig: %v", kerr) + return err } - err = p.DeleteNodes(n) + err = kubeconfig.Remove(name, explicitKubeconfigPath) if err != nil { + logger.Errorf("failed to update kubeconfig: %v", err) return err } - if kerr != nil { - return err - } + return nil } diff --git a/pkg/cluster/internal/providers/docker/provider.go b/pkg/cluster/internal/providers/docker/provider.go index ae9635be04..f771058f72 100644 --- a/pkg/cluster/internal/providers/docker/provider.go +++ b/pkg/cluster/internal/providers/docker/provider.go @@ -124,8 +124,13 @@ func (p *Provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *Provider) DeleteNodes(n []nodes.Node) error { +// DeleteCluster is part of the providers.Provider interface +func (p *Provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") + } + if len(n) == 0 { return nil } diff --git a/pkg/cluster/internal/providers/podman/network.go b/pkg/cluster/internal/providers/podman/network.go new file mode 100644 index 0000000000..dec00c4324 --- /dev/null +++ b/pkg/cluster/internal/providers/podman/network.go @@ -0,0 +1,120 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podman + +import ( + "crypto/sha1" + "encoding/binary" + "errors" + "fmt" + "net" + "strings" + + "sigs.k8s.io/kind/pkg/exec" +) + +// By default podman creates one network per cluster, this allows to use +// DNS to resolve container names and use the corresponding IP family, since +// podman does not support dual stack containers yet in the `podman network` +// However, podman uses CNI, and it is possible to creates a CNI config file +// manually for podman to provide dual-stack if necessary. +// +// For now this also makes it easier for apps to join the same network, and +// leaves users with complex networking desires to create and manage their own +// networks. +const fixedNetworkPrefix = "kind" + +// ensureNetwork creates a new network with the prefix + cluster name +func ensureNetwork(name string, isIPv6 bool) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + // TODO: revisit for dual stack + subnet := "" + if isIPv6 { + // generate unique subnet per network based on the name + // obtained from the ULA fc00::/8 range + // Make N attempts with "probing" in case we happen to collide + subnet = generateULASubnetFromName(networkName, 0) + } + err := createNetwork(networkName, subnet) + if err == nil { + // Success! + return nil + } + + // Only continue if the error is because of the subnet range + // is already allocated + if !isPoolOverlapError(err) { + return err + } + + // keep trying for ipv6 subnets + const maxAttempts = 5 + for attempt := int32(1); attempt < maxAttempts; attempt++ { + subnet := generateULASubnetFromName(networkName, attempt) + err = createNetwork(networkName, subnet) + if err == nil { + // success! + return nil + } else if !isPoolOverlapError(err) { + // unknown error ... + return err + } + } + return errors.New("exhausted attempts trying to find a non-overlapping subnet") + +} + +func createNetwork(name, subnet string) error { + if subnet != "" { + return exec.Command("podman", "network", "create", "-d=bridge", + "--subnet", subnet, name).Run() + } + return exec.Command("podman", "network", "create", "-d=bridge", + name).Run() +} + +func deleteNetwork(name string) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + return exec.Command("podman", "network", "rm", networkName).Run() +} + +func isPoolOverlapError(err error) bool { + rerr := exec.RunErrorForError(err) + return rerr != nil && + (strings.Contains(string(rerr.Output), "is being used by a network interface") || + strings.Contains(string(rerr.Output), "is already being used by a cni configuration")) +} + +// generateULASubnetFromName generate an IPv6 subnet based on the +// name and Nth probing attempt +func generateULASubnetFromName(name string, attempt int32) string { + ip := make([]byte, 16) + ip[0] = 0xfc + ip[1] = 0x00 + h := sha1.New() + _, _ = h.Write([]byte(name)) + _ = binary.Write(h, binary.LittleEndian, attempt) + bs := h.Sum(nil) + for i := 2; i < 8; i++ { + ip[i] = bs[i] + } + subnet := &net.IPNet{ + IP: net.IP(ip), + Mask: net.CIDRMask(64, 128), + } + return subnet.String() +} diff --git a/pkg/cluster/internal/providers/podman/node.go b/pkg/cluster/internal/providers/podman/node.go index 4a530f08b2..6d1ad3c567 100644 --- a/pkg/cluster/internal/providers/podman/node.go +++ b/pkg/cluster/internal/providers/podman/node.go @@ -53,7 +53,7 @@ func (n *node) Role() (string, error) { func (n *node) IP() (ipv4 string, ipv6 string, err error) { // retrieve the IP address of the node using podman inspect cmd := exec.Command("podman", "inspect", - "-f", "{{.NetworkSettings.IPAddress}},{{.NetworkSettings.GlobalIPv6Address}}", + "-f", "\"{{range .NetworkSettings.Networks}}{{.IPAddress}},{{.GlobalIPv6Address}}{{end}}\"", n.name, // ... against the "node" container ) lines, err := exec.OutputLines(cmd) @@ -63,7 +63,10 @@ func (n *node) IP() (ipv4 string, ipv6 string, err error) { if len(lines) != 1 { return "", "", errors.Errorf("file should only be one line, got %d lines", len(lines)) } - ips := strings.Split(lines[0], ",") + // TODO: investigate where the double quotes are added + // it does not seem to happen running from the CLI + line := strings.ReplaceAll(lines[0], "\"", "") + ips := strings.Split(line, ",") if len(ips) != 2 { return "", "", errors.Errorf("container addresses should have 2 values, got %d values", len(ips)) } diff --git a/pkg/cluster/internal/providers/podman/provider.go b/pkg/cluster/internal/providers/podman/provider.go index 92b54ac887..7df7fa9929 100644 --- a/pkg/cluster/internal/providers/podman/provider.go +++ b/pkg/cluster/internal/providers/podman/provider.go @@ -72,6 +72,10 @@ func (p *Provider) Provision(status *cli.Status, cfg *config.Cluster) (err error return err } + if err := ensureNetwork(cfg.Name, clusterIsIPv6(cfg)); err != nil { + return errors.Wrap(err, "failed to ensure podman network") + } + // actually provision the cluster icons := strings.Repeat("📦 ", len(cfg.Nodes)) status.Start(fmt.Sprintf("Preparing nodes %s", icons)) @@ -126,8 +130,13 @@ func (p *Provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *Provider) DeleteNodes(n []nodes.Node) error { +// DeleteCluster is part of the providers.Provider interface +func (p *Provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") + } + if len(n) == 0 { return nil } @@ -152,7 +161,11 @@ func (p *Provider) DeleteNodes(n []nodes.Node) error { } nodeVolumes = append(nodeVolumes, volumes...) } - return deleteVolumes(nodeVolumes) + if err := deleteVolumes(nodeVolumes); err != nil { + return err + } + + return deleteNetwork(name) } // GetAPIServerEndpoint is part of the providers.Provider interface @@ -238,16 +251,10 @@ func (p *Provider) GetAPIServerInternalEndpoint(cluster string) (string, error) } n, err := nodeutils.APIServerEndpointNode(allNodes) if err != nil { - return "", errors.Wrap(err, "failed to get apiserver endpoint") - } - // TODO: check cluster IP family and return the correct IP - // This means IPv6 singlestack is broken on podman - ipv4, _, err := n.IP() - if err != nil { - return "", errors.Wrap(err, "failed to get apiserver IP") + return "", errors.Wrap(err, "failed to get api server endpoint") } - return net.JoinHostPort(ipv4, fmt.Sprintf("%d", common.APIServerInternalPort)), nil - + // NOTE: we're using the nodes's hostnames which are their names + return net.JoinHostPort(n.String(), fmt.Sprintf("%d", common.APIServerInternalPort)), nil } // node returns a new node handle for this provider diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index da3e6eff1c..4619ed792f 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -140,6 +140,8 @@ func commonArgs(cfg *config.Cluster) ([]string, error) { args := []string{ "--detach", // run the container detached "--tty", // allocate a tty for entrypoint logs + // attach to its own network + "--network", fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name), // label the node with the cluster ID "--label", fmt.Sprintf("%s=%s", clusterLabelKey, cfg.Name), } @@ -230,6 +232,8 @@ func runArgsForLoadBalancer(cfg *config.Cluster, name string, args []string) ([] "run", "--hostname", name, // make hostname match container name "--name", name, // ... and set the container name + // attach to its own network + "--network", fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name), // label the node with the role ID "--label", fmt.Sprintf("%s=%s", nodeRoleLabelKey, constants.ExternalLoadBalancerNodeRoleValue), }, @@ -258,8 +262,9 @@ func getProxyEnv(cfg *config.Cluster) (map[string]string, error) { envs := common.GetProxyEnvs(cfg) // Specifically add the podman network subnets to NO_PROXY if we are using a proxy if len(envs) > 0 { - // podman default bridge network is named "bridge" (https://docs.podman.com/network/bridge/#use-the-default-bridge-network) - subnets, err := getSubnets("bridge") + // podman creates a network per cluster + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name) + subnets, err := getSubnets(networkName) if err != nil { return nil, err } diff --git a/pkg/cluster/internal/providers/provider/provider.go b/pkg/cluster/internal/providers/provider/provider.go index 937b29ba03..cf912ceb68 100644 --- a/pkg/cluster/internal/providers/provider/provider.go +++ b/pkg/cluster/internal/providers/provider/provider.go @@ -35,10 +35,8 @@ type Provider interface { // ListNodes returns the nodes under this provider for the given // cluster name, they may or may not be running correctly ListNodes(cluster string) ([]nodes.Node, error) - // DeleteNodes deletes the provided list of nodes - // These should be from results previously returned by this provider - // E.G. by ListNodes() - DeleteNodes([]nodes.Node) error + // DeleteCluster deletes the cluster resources (nodes and/or network) + DeleteCluster(cluster string) error // GetAPIServerEndpoint returns the host endpoint for the cluster's API server GetAPIServerEndpoint(cluster string) (string, error) // GetAPIServerEndpoint returns the internal network endpoint for the cluster's API server