diff --git a/pkg/cluster/internal/providers/podman/network.go b/pkg/cluster/internal/providers/podman/network.go new file mode 100644 index 0000000000..e08e6f01de --- /dev/null +++ b/pkg/cluster/internal/providers/podman/network.go @@ -0,0 +1,135 @@ +/* +Copyright 2020 The Kubernetes Authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package podman + +import ( + "crypto/sha1" + "encoding/binary" + "net" + "regexp" + "strings" + + "sigs.k8s.io/kind/pkg/errors" + "sigs.k8s.io/kind/pkg/exec" +) + +// This may be overridden by KIND_EXPERIMENTAL_PODMAN_NETWORK env, +// experimentally... +// +// By default currently picking a single network is equivalent to the previous +// behavior *except* that we moved from the default bridge to a user defined +// network because the default bridge is actually special versus any other +// docker network and lacks the emebdded DNS +// +// For now this also makes it easier for apps to join the same network, and +// leaves users with complex networking desires to create and manage their own +// networks. +const fixedNetworkName = "kind" + +// ensureNetwork creates a new network +// podman only creates IPv6 networks for versions >= 2.2.0 +func ensureNetwork(name string) error { + // network already exists + if checkIfNetworkExists(name) { + return nil + } + + // generate unique subnet per network based on the name + // obtained from the ULA fc00::/8 range + // Make N attempts with "probing" in case we happen to collide + subnet := generateULASubnetFromName(name, 0) + err := createNetwork(name, subnet) + if err == nil { + // Success! + return nil + } + + if isUnknownIPv6FlagError(err) { + return createNetwork(name, "") + } + + // Only continue if the error is because of the subnet range + // is already allocated + if !isPoolOverlapError(err) { + return err + } + + // keep trying for ipv6 subnets + const maxAttempts = 5 + for attempt := int32(1); attempt < maxAttempts; attempt++ { + subnet := generateULASubnetFromName(name, attempt) + err = createNetwork(name, subnet) + if err == nil { + // success! + return nil + } else if !isPoolOverlapError(err) { + // unknown error ... + return err + } + } + return errors.New("exhausted attempts trying to find a non-overlapping subnet") + +} + +func createNetwork(name, ipv6Subnet string) error { + if ipv6Subnet == "" { + return exec.Command("podman", "network", "create", "-d=bridge", name).Run() + } + return exec.Command("podman", "network", "create", "-d=bridge", + "--ipv6", "--subnet", ipv6Subnet, name).Run() +} + +func checkIfNetworkExists(name string) bool { + _, err := exec.Output(exec.Command( + "podman", "network", "inspect", + regexp.QuoteMeta(name), + )) + return err == nil +} + +func isUnknownIPv6FlagError(err error) bool { + rerr := exec.RunErrorForError(err) + return rerr != nil && + strings.Contains(string(rerr.Output), "unknown flag: --ipv6") +} + +func isPoolOverlapError(err error) bool { + rerr := exec.RunErrorForError(err) + return rerr != nil && + (strings.Contains(string(rerr.Output), "is being used by a network interface") || + strings.Contains(string(rerr.Output), "is already being used by a cni configuration")) +} + +// generateULASubnetFromName generate an IPv6 subnet based on the +// name and Nth probing attempt +func generateULASubnetFromName(name string, attempt int32) string { + ip := make([]byte, 16) + ip[0] = 0xfc + ip[1] = 0x00 + h := sha1.New() + _, _ = h.Write([]byte(name)) + _ = binary.Write(h, binary.LittleEndian, attempt) + bs := h.Sum(nil) + for i := 2; i < 8; i++ { + ip[i] = bs[i] + } + subnet := &net.IPNet{ + IP: net.IP(ip), + Mask: net.CIDRMask(64, 128), + } + return subnet.String() +} diff --git a/pkg/cluster/internal/providers/podman/node.go b/pkg/cluster/internal/providers/podman/node.go index 4a530f08b2..326f6e08e4 100644 --- a/pkg/cluster/internal/providers/podman/node.go +++ b/pkg/cluster/internal/providers/podman/node.go @@ -52,8 +52,8 @@ func (n *node) Role() (string, error) { func (n *node) IP() (ipv4 string, ipv6 string, err error) { // retrieve the IP address of the node using podman inspect - cmd := exec.Command("podman", "inspect", - "-f", "{{.NetworkSettings.IPAddress}},{{.NetworkSettings.GlobalIPv6Address}}", + cmd := exec.Command("docker", "inspect", + "-f", "{{range .NetworkSettings.Networks}}{{.IPAddress}},{{.GlobalIPv6Address}}{{end}}", n.name, // ... against the "node" container ) lines, err := exec.OutputLines(cmd) diff --git a/pkg/cluster/internal/providers/podman/provider.go b/pkg/cluster/internal/providers/podman/provider.go index 65c35d7047..83d0e78765 100644 --- a/pkg/cluster/internal/providers/podman/provider.go +++ b/pkg/cluster/internal/providers/podman/provider.go @@ -79,13 +79,24 @@ func (p *provider) Provision(status *cli.Status, cfg *config.Cluster) (err error return err } + // ensure the pre-requesite network exists + networkName := fixedNetworkName + if n := os.Getenv("KIND_EXPERIMENTAL_PODMAN_NETWORK"); n != "" { + p.logger.Warn("WARNING: Overriding podman network due to KIND_EXPERIMENTAL_PODMAN_NETWORK") + p.logger.Warn("WARNING: Here be dragons! This is not supported currently.") + networkName = n + } + if err := ensureNetwork(networkName); err != nil { + return errors.Wrap(err, "failed to ensure podman network") + } + // actually provision the cluster icons := strings.Repeat("📦 ", len(cfg.Nodes)) status.Start(fmt.Sprintf("Preparing nodes %s", icons)) defer func() { status.End(err == nil) }() // plan creating the containers - createContainerFuncs, err := planCreation(cfg) + createContainerFuncs, err := planCreation(cfg, networkName) if err != nil { return err } @@ -247,14 +258,8 @@ func (p *provider) GetAPIServerInternalEndpoint(cluster string) (string, error) if err != nil { return "", errors.Wrap(err, "failed to get apiserver endpoint") } - // TODO: check cluster IP family and return the correct IP - // This means IPv6 singlestack is broken on podman - ipv4, _, err := n.IP() - if err != nil { - return "", errors.Wrap(err, "failed to get apiserver IP") - } - return net.JoinHostPort(ipv4, fmt.Sprintf("%d", common.APIServerInternalPort)), nil - + // NOTE: we're using the nodes's hostnames which are their names + return net.JoinHostPort(n.String(), fmt.Sprintf("%d", common.APIServerInternalPort)), nil } // node returns a new node handle for this provider diff --git a/pkg/cluster/internal/providers/podman/provision.go b/pkg/cluster/internal/providers/podman/provision.go index 8e4d75ffef..47577d3e56 100644 --- a/pkg/cluster/internal/providers/podman/provision.go +++ b/pkg/cluster/internal/providers/podman/provision.go @@ -32,10 +32,10 @@ import ( ) // planCreation creates a slice of funcs that will create the containers -func planCreation(cfg *config.Cluster) (createContainerFuncs []func() error, err error) { +func planCreation(cfg *config.Cluster, networkName string) (createContainerFuncs []func() error, err error) { // these apply to all container creation nodeNamer := common.MakeNodeNamer(cfg.Name) - genericArgs, err := commonArgs(cfg) + genericArgs, err := commonArgs(cfg, networkName) if err != nil { return nil, err } @@ -135,11 +135,12 @@ func clusterHasImplicitLoadBalancer(cfg *config.Cluster) bool { } // commonArgs computes static arguments that apply to all containers -func commonArgs(cfg *config.Cluster) ([]string, error) { +func commonArgs(cfg *config.Cluster, networkName string) ([]string, error) { // standard arguments all nodes containers need, computed once args := []string{ - "--detach", // run the container detached - "--tty", // allocate a tty for entrypoint logs + "--detach", // run the container detached + "--tty", // allocate a tty for entrypoint logs + "--net", networkName, // attach to its own network // label the node with the cluster ID "--label", fmt.Sprintf("%s=%s", clusterLabelKey, cfg.Name), }