From 20c03777726312dd58c7ea3cf986961a1addb3e3 Mon Sep 17 00:00:00 2001 From: Antonio Ojea Date: Thu, 13 Aug 2020 05:41:10 +0200 Subject: [PATCH] podman networking Since 0.8.0 KIND uses custom networks with docker to leverage the embedded DNS server and other features. This provides the same functionality for podman, however, due to the difference networking implementation of both (libnetwork vs CNI) podman uses a different networking model for cluster. It creates one network per cluster with the same lifecycle of the cluster, i.e. the network is deleted when the cluster is deleted. fix load balancer for podman docker uses a custom DNS server listening in 127.0.0.1:53 for custom networks, however, podman does not. The loadbalancer docker image does not have the same "network magic" we are using for the node images, thus we need to configure a different resolver depending on the provider. Signed-off-by: Antonio Ojea --- .../internal/create/actions/config/config.go | 2 +- .../actions/loadbalancer/loadbalancer.go | 9 ++ pkg/cluster/internal/delete/delete.go | 17 +-- pkg/cluster/internal/loadbalancer/config.go | 9 +- .../internal/providers/docker/provider.go | 9 +- .../internal/providers/podman/network.go | 124 ++++++++++++++++++ pkg/cluster/internal/providers/podman/node.go | 7 +- .../internal/providers/podman/provider.go | 70 +++++----- .../internal/providers/podman/provision.go | 13 +- pkg/cluster/internal/providers/podman/util.go | 8 +- .../internal/providers/provider/provider.go | 6 +- 11 files changed, 214 insertions(+), 60 deletions(-) create mode 100644 pkg/cluster/internal/providers/podman/network.go diff --git a/pkg/cluster/internal/create/actions/config/config.go b/pkg/cluster/internal/create/actions/config/config.go index aa5c1e7453..ef7aa275d9 100644 --- a/pkg/cluster/internal/create/actions/config/config.go +++ b/pkg/cluster/internal/create/actions/config/config.go @@ -208,7 +208,7 @@ func getKubeadmConfig(cfg *config.Cluster, data kubeadm.ConfigData, node nodes.N // configure the right protocol addresses if cfg.Networking.IPFamily == "ipv6" { if nodeAddressIPv6 == "" { - return "", errors.Errorf("failed to get IPV6 address; is the docker daemon configured to use IPV6 correctly?") + return "", errors.Errorf("failed to get IPV6 address; is the provider configured to use IPV6 correctly?") } data.NodeAddress = nodeAddressIPv6 } diff --git a/pkg/cluster/internal/create/actions/loadbalancer/loadbalancer.go b/pkg/cluster/internal/create/actions/loadbalancer/loadbalancer.go index 0c0c904f45..5fe825915c 100644 --- a/pkg/cluster/internal/create/actions/loadbalancer/loadbalancer.go +++ b/pkg/cluster/internal/create/actions/loadbalancer/loadbalancer.go @@ -26,6 +26,7 @@ import ( "sigs.k8s.io/kind/pkg/cluster/internal/create/actions" "sigs.k8s.io/kind/pkg/cluster/internal/loadbalancer" + "sigs.k8s.io/kind/pkg/cluster/internal/providers/podman" "sigs.k8s.io/kind/pkg/cluster/internal/providers/provider/common" "sigs.k8s.io/kind/pkg/cluster/nodeutils" ) @@ -74,11 +75,19 @@ func (a *Action) Execute(ctx *actions.ActionContext) error { backendServers[n.String()] = fmt.Sprintf("%s:%d", n.String(), common.APIServerInternalPort) } + // TODO(bentheelder): settle on a better abstraction for this + // While we don't want much code aware of the provider name, + // we do need it to configure the external loadbalancer + dockerProvider := true + if _, ok := ctx.Provider.(*podman.Provider); ok { + dockerProvider = false + } // create loadbalancer config data loadbalancerConfig, err := loadbalancer.Config(&loadbalancer.ConfigData{ ControlPlanePort: common.APIServerInternalPort, BackendServers: backendServers, IPv6: ctx.Config.Networking.IPFamily == config.IPv6Family, + Docker: dockerProvider, }) if err != nil { return errors.Wrap(err, "failed to generate loadbalancer config data") diff --git a/pkg/cluster/internal/delete/delete.go b/pkg/cluster/internal/delete/delete.go index 4cafc2db49..04fa8d583b 100644 --- a/pkg/cluster/internal/delete/delete.go +++ b/pkg/cluster/internal/delete/delete.go @@ -17,7 +17,6 @@ limitations under the License. package delete import ( - "sigs.k8s.io/kind/pkg/errors" "sigs.k8s.io/kind/pkg/log" "sigs.k8s.io/kind/pkg/cluster/internal/kubeconfig" @@ -28,22 +27,16 @@ import ( // explicitKubeconfigPath is --kubeconfig, following the rules from // https://kubernetes.io/docs/reference/generated/kubectl/kubectl-commands func Cluster(logger log.Logger, p provider.Provider, name, explicitKubeconfigPath string) error { - n, err := p.ListNodes(name) + err := p.DeleteCluster(name) if err != nil { - return errors.Wrap(err, "error listing nodes") - } - - kerr := kubeconfig.Remove(name, explicitKubeconfigPath) - if kerr != nil { - logger.Errorf("failed to update kubeconfig: %v", kerr) + return err } - err = p.DeleteNodes(n) + err = kubeconfig.Remove(name, explicitKubeconfigPath) if err != nil { + logger.Errorf("failed to update kubeconfig: %v", err) return err } - if kerr != nil { - return err - } + return nil } diff --git a/pkg/cluster/internal/loadbalancer/config.go b/pkg/cluster/internal/loadbalancer/config.go index 71e17388b3..4fdeec8c58 100644 --- a/pkg/cluster/internal/loadbalancer/config.go +++ b/pkg/cluster/internal/loadbalancer/config.go @@ -28,6 +28,7 @@ type ConfigData struct { ControlPlanePort int BackendServers map[string]string IPv6 bool + Docker bool } // DefaultConfigTemplate is the loadbalancer config template @@ -37,8 +38,12 @@ global log /dev/log local1 notice daemon -resolvers docker +resolvers kinddns + {{ if $.Docker -}} nameserver dns 127.0.0.11:53 + {{- else -}} + parse-resolv-conf + {{- end }} defaults log global @@ -62,7 +67,7 @@ backend kube-apiservers option httpchk GET /healthz # TODO: we should be verifying (!) {{range $server, $address := .BackendServers}} - server {{ $server }} {{ $address }} check check-ssl verify none resolvers docker resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} + server {{ $server }} {{ $address }} check check-ssl verify none resolvers kinddns resolve-prefer {{ if $.IPv6 -}} ipv6 {{- else -}} ipv4 {{- end }} {{- end}} ` diff --git a/pkg/cluster/internal/providers/docker/provider.go b/pkg/cluster/internal/providers/docker/provider.go index 762541d8e3..4389238045 100644 --- a/pkg/cluster/internal/providers/docker/provider.go +++ b/pkg/cluster/internal/providers/docker/provider.go @@ -124,8 +124,13 @@ func (p *Provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *Provider) DeleteNodes(n []nodes.Node) error { +// DeleteCluster is part of the providers.Provider interface +func (p *Provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") + } + if len(n) == 0 { return nil } diff --git a/pkg/cluster/internal/providers/podman/network.go b/pkg/cluster/internal/providers/podman/network.go new file mode 100644 index 0000000000..776dd2b10e --- /dev/null +++ b/pkg/cluster/internal/providers/podman/network.go @@ -0,0 +1,124 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podman + +import ( + "crypto/sha1" + "encoding/binary" + "fmt" + "net" + "strings" + + "sigs.k8s.io/kind/pkg/errors" + "sigs.k8s.io/kind/pkg/exec" +) + +// By default podman creates one network per cluster, this allows to use +// DNS to resolve container names and use the corresponding IP family, since +// podman does not support dual stack containers yet in the `podman network` +// However, podman uses CNI, and it is possible to creates a CNI config file +// manually for podman to provide dual-stack if necessary. +// +// For now this also makes it easier for apps to join the same network, and +// leaves users with complex networking desires to create and manage their own +// networks. +const fixedNetworkPrefix = "kind" + +// ensureNetwork creates a new network with the prefix + cluster name +func ensureNetwork(name string, isIPv6 bool) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + // TODO: revisit for dual stack + subnet := "" + if isIPv6 { + // generate unique subnet per network based on the name + // obtained from the ULA fc00::/8 range + // Make N attempts with "probing" in case we happen to collide + subnet = generateULASubnetFromName(networkName, 0) + } + err := createNetwork(networkName, subnet) + if err == nil { + // Success! + return nil + } + + // Only continue if the error is because of the subnet range + // is already allocated + if !isPoolOverlapError(err) { + return err + } + + // keep trying for ipv6 subnets + const maxAttempts = 5 + for attempt := int32(1); attempt < maxAttempts; attempt++ { + subnet := generateULASubnetFromName(networkName, attempt) + err = createNetwork(networkName, subnet) + if err == nil { + // success! + return nil + } else if !isPoolOverlapError(err) { + // unknown error ... + return err + } + } + return errors.New("exhausted attempts trying to find a non-overlapping subnet") + +} + +func createNetwork(name, subnet string) error { + if subnet != "" { + return exec.Command("podman", "network", "create", "-d=bridge", + "--subnet", subnet, name).Run() + } + return exec.Command("podman", "network", "create", "-d=bridge", + name).Run() +} + +// delete network if exists, otherwise do nothing +func deleteNetwork(name string) error { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + if err := exec.Command("podman", "network", "inspect", networkName).Run(); err != nil { + return nil + } + return exec.Command("podman", "network", "rm", networkName).Run() +} + +func isPoolOverlapError(err error) bool { + rerr := exec.RunErrorForError(err) + return rerr != nil && + (strings.Contains(string(rerr.Output), "is being used by a network interface") || + strings.Contains(string(rerr.Output), "is already being used by a cni configuration")) +} + +// generateULASubnetFromName generate an IPv6 subnet based on the +// name and Nth probing attempt +func generateULASubnetFromName(name string, attempt int32) string { + ip := make([]byte, 16) + ip[0] = 0xfc + ip[1] = 0x00 + h := sha1.New() + _, _ = h.Write([]byte(name)) + _ = binary.Write(h, binary.LittleEndian, attempt) + bs := h.Sum(nil) + for i := 2; i < 8; i++ { + ip[i] = bs[i] + } + subnet := &net.IPNet{ + IP: net.IP(ip), + Mask: net.CIDRMask(64, 128), + } + return subnet.String() +} diff --git a/pkg/cluster/internal/providers/podman/node.go b/pkg/cluster/internal/providers/podman/node.go index 4a530f08b2..6d1ad3c567 100644 --- a/pkg/cluster/internal/providers/podman/node.go +++ b/pkg/cluster/internal/providers/podman/node.go @@ -53,7 +53,7 @@ func (n *node) Role() (string, error) { func (n *node) IP() (ipv4 string, ipv6 string, err error) { // retrieve the IP address of the node using podman inspect cmd := exec.Command("podman", "inspect", - "-f", "{{.NetworkSettings.IPAddress}},{{.NetworkSettings.GlobalIPv6Address}}", + "-f", "\"{{range .NetworkSettings.Networks}}{{.IPAddress}},{{.GlobalIPv6Address}}{{end}}\"", n.name, // ... against the "node" container ) lines, err := exec.OutputLines(cmd) @@ -63,7 +63,10 @@ func (n *node) IP() (ipv4 string, ipv6 string, err error) { if len(lines) != 1 { return "", "", errors.Errorf("file should only be one line, got %d lines", len(lines)) } - ips := strings.Split(lines[0], ",") + // TODO: investigate where the double quotes are added + // it does not seem to happen running from the CLI + line := strings.ReplaceAll(lines[0], "\"", "") + ips := strings.Split(line, ",") if len(ips) != 2 { return "", "", errors.Errorf("container addresses should have 2 values, got %d values", len(ips)) } diff --git a/pkg/cluster/internal/providers/podman/provider.go b/pkg/cluster/internal/providers/podman/provider.go index 92b54ac887..d5024e04ea 100644 --- a/pkg/cluster/internal/providers/podman/provider.go +++ b/pkg/cluster/internal/providers/podman/provider.go @@ -72,6 +72,10 @@ func (p *Provider) Provision(status *cli.Status, cfg *config.Cluster) (err error return err } + if err := ensureNetwork(cfg.Name, clusterIsIPv6(cfg)); err != nil { + return errors.Wrap(err, "failed to ensure podman network") + } + // actually provision the cluster icons := strings.Repeat("📦 ", len(cfg.Nodes)) status.Start(fmt.Sprintf("Preparing nodes %s", icons)) @@ -126,33 +130,41 @@ func (p *Provider) ListNodes(cluster string) ([]nodes.Node, error) { return ret, nil } -// DeleteNodes is part of the providers.Provider interface -func (p *Provider) DeleteNodes(n []nodes.Node) error { - if len(n) == 0 { - return nil - } - const command = "podman" - args := make([]string, 0, len(n)+3) // allocate once - args = append(args, - "rm", - "-f", // force the container to be delete now - "-v", // delete volumes - ) - for _, node := range n { - args = append(args, node.String()) - } - if err := exec.Command(command, args...).Run(); err != nil { - return errors.Wrap(err, "failed to delete nodes") +// DeleteCluster is part of the providers.Provider interface +func (p *Provider) DeleteCluster(name string) error { + n, err := p.ListNodes(name) + if err != nil { + return errors.Wrap(err, "error listing nodes") } - var nodeVolumes []string - for _, node := range n { - volumes, err := getVolumes(node.String()) - if err != nil { + + // delete nodes + if len(n) != 0 { + const command = "podman" + args := make([]string, 0, len(n)+3) // allocate once + args = append(args, + "rm", + "-f", // force the container to be delete now + "-v", // delete volumes + ) + for _, node := range n { + args = append(args, node.String()) + } + if err := exec.Command(command, args...).Run(); err != nil { + return errors.Wrap(err, "failed to delete nodes") + } + var nodeVolumes []string + for _, node := range n { + volumes, err := getVolumes(node.String()) + if err != nil { + return err + } + nodeVolumes = append(nodeVolumes, volumes...) + } + if err := deleteVolumes(nodeVolumes); err != nil { return err } - nodeVolumes = append(nodeVolumes, volumes...) } - return deleteVolumes(nodeVolumes) + return deleteNetwork(name) } // GetAPIServerEndpoint is part of the providers.Provider interface @@ -238,16 +250,10 @@ func (p *Provider) GetAPIServerInternalEndpoint(cluster string) (string, error) } n, err := nodeutils.APIServerEndpointNode(allNodes) if err != nil { - return "", errors.Wrap(err, "failed to get apiserver endpoint") - } - // TODO: check cluster IP family and return the correct IP - // This means IPv6 singlestack is broken on podman - ipv4, _, err := n.IP() - if err != nil { - return "", errors.Wrap(err, "failed to get apiserver IP") + return "", errors.Wrap(err, "failed to get api server endpoint") } - return net.JoinHostPort(ipv4, fmt.Sprintf("%d", common.APIServerInternalPort)), nil - + // NOTE: we're using the nodes's hostnames which are their names + return net.JoinHostPort(n.String(), fmt.Sprintf("%d", common.APIServerInternalPort)), nil } // node returns a new node handle for this provider diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index da3e6eff1c..90829c4182 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -140,6 +140,8 @@ func commonArgs(cfg *config.Cluster) ([]string, error) { args := []string{ "--detach", // run the container detached "--tty", // allocate a tty for entrypoint logs + // attach to its own network + "--network", fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name), // label the node with the cluster ID "--label", fmt.Sprintf("%s=%s", clusterLabelKey, cfg.Name), } @@ -258,8 +260,9 @@ func getProxyEnv(cfg *config.Cluster) (map[string]string, error) { envs := common.GetProxyEnvs(cfg) // Specifically add the podman network subnets to NO_PROXY if we are using a proxy if len(envs) > 0 { - // podman default bridge network is named "bridge" (https://docs.podman.com/network/bridge/#use-the-default-bridge-network) - subnets, err := getSubnets("bridge") + // podman creates a network per cluster + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, cfg.Name) + subnets, err := getSubnets(networkName) if err != nil { return nil, err } @@ -277,8 +280,10 @@ func getProxyEnv(cfg *config.Cluster) (map[string]string, error) { return envs, nil } -func getSubnets(networkName string) ([]string, error) { - format := `{{range (index (index . "IPAM") "Config")}}{{index . "Subnet"}} {{end}}` +func getSubnets(name string) ([]string, error) { + networkName := fmt.Sprintf("%s-%s", fixedNetworkPrefix, name) + // TODO(dualstack): podman currently only supports one range per network + format := `'{{range (index (index .plugins 0).ipam.ranges 0)}}{{.subnet}}{{end}}'` cmd := exec.Command("podman", "network", "inspect", "-f", format, networkName) lines, err := exec.CombinedOutputLines(cmd) if err != nil { diff --git a/pkg/cluster/internal/providers/podman/util.go b/pkg/cluster/internal/providers/podman/util.go index f3d19d0f21..85ad5ddb56 100644 --- a/pkg/cluster/internal/providers/podman/util.go +++ b/pkg/cluster/internal/providers/podman/util.go @@ -89,6 +89,7 @@ func createAnonymousVolume(label string) (string, error) { // getVolumes gets volume names filtered on specified label func getVolumes(label string) ([]string, error) { + var volumes []string cmd := exec.Command("podman", "volume", "ls", @@ -102,7 +103,12 @@ func getVolumes(label string) ([]string, error) { // Trim away the last `\n`. trimmedOutput := strings.TrimSuffix(string(output), "\n") // Get names of all volumes by splitting via `\n`. - return strings.Split(string(trimmedOutput), "\n"), nil + for _, v := range strings.Split(string(trimmedOutput), "\n") { + if v != "" { + volumes = append(volumes, v) + } + } + return volumes, nil } func deleteVolumes(names []string) error { diff --git a/pkg/cluster/internal/providers/provider/provider.go b/pkg/cluster/internal/providers/provider/provider.go index 937b29ba03..cf912ceb68 100644 --- a/pkg/cluster/internal/providers/provider/provider.go +++ b/pkg/cluster/internal/providers/provider/provider.go @@ -35,10 +35,8 @@ type Provider interface { // ListNodes returns the nodes under this provider for the given // cluster name, they may or may not be running correctly ListNodes(cluster string) ([]nodes.Node, error) - // DeleteNodes deletes the provided list of nodes - // These should be from results previously returned by this provider - // E.G. by ListNodes() - DeleteNodes([]nodes.Node) error + // DeleteCluster deletes the cluster resources (nodes and/or network) + DeleteCluster(cluster string) error // GetAPIServerEndpoint returns the host endpoint for the cluster's API server GetAPIServerEndpoint(cluster string) (string, error) // GetAPIServerEndpoint returns the internal network endpoint for the cluster's API server