diff --git a/common/utils.go b/common/utils.go index d856e40e33..666eb4f777 100644 --- a/common/utils.go +++ b/common/utils.go @@ -2,12 +2,14 @@ package common import ( "fmt" - "github.com/vishvananda/netlink" - "github.com/vishvananda/netns" "net" "os" - "runtime" "strings" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" + + weavenet "github.com/weaveworks/weave/net" ) // Assert test is true, panic otherwise @@ -25,25 +27,6 @@ func ErrorMessages(errors []error) string { return strings.Join(result, "\n") } -func WithNetNS(ns netns.NsHandle, work func() error) error { - runtime.LockOSThread() - defer runtime.UnlockOSThread() - - oldNs, err := netns.Get() - if err == nil { - defer oldNs.Close() - - err = netns.Set(ns) - if err == nil { - defer netns.Set(oldNs) - - err = work() - } - } - - return err -} - type NetDev struct { Name string MAC net.HardwareAddr @@ -63,7 +46,7 @@ func FindNetDevs(processID int, match func(link netlink.Link) bool) ([]NetDev, e } defer ns.Close() - err = WithNetNS(ns, func() error { + err = weavenet.WithNetNS(ns, func() error { return forEachLink(func(link netlink.Link) error { if match(link) { netDev, err := linkToNetDev(link) diff --git a/common/arp.go b/net/arp.go similarity index 98% rename from common/arp.go rename to net/arp.go index 094d6b168b..8315db94c9 100644 --- a/common/arp.go +++ b/net/arp.go @@ -1,4 +1,4 @@ -package common +package net import "fmt" import "io" diff --git a/net/bridge.go b/net/bridge.go new file mode 100644 index 0000000000..07d1f6cb7d --- /dev/null +++ b/net/bridge.go @@ -0,0 +1,53 @@ +package net + +import ( + "github.com/vishvananda/netlink" +) + +type BridgeType int + +const ( + WeaveBridgeName = "weave" + DatapathName = "datapath" + + None BridgeType = iota + Bridge + Fastdp + BridgedFastdp + Inconsistent +) + +func DetectBridgeType(weaveBridgeName, datapathName string) BridgeType { + bridge, _ := netlink.LinkByName(weaveBridgeName) + datapath, _ := netlink.LinkByName(datapathName) + + switch { + case bridge == nil && datapath == nil: + return None + case isBridge(bridge) && datapath == nil: + return Bridge + case isDatapath(bridge) && datapath == nil: + return Fastdp + case isDatapath(datapath) && isBridge(bridge): + return BridgedFastdp + default: + return Inconsistent + } +} + +func isBridge(link netlink.Link) bool { + _, isBridge := link.(*netlink.Bridge) + return isBridge +} + +func isDatapath(link netlink.Link) bool { + switch link.(type) { + case *netlink.GenericLink: + return link.Type() == "openvswitch" + case *netlink.Device: + // Assume it's our openvswitch device, and the kernel has not been updated to report the kind. + return true + default: + return false + } +} diff --git a/net/ethtool.go b/net/ethtool.go new file mode 100644 index 0000000000..34209f0d1d --- /dev/null +++ b/net/ethtool.go @@ -0,0 +1,52 @@ +package net + +import "fmt" +import "syscall" +import "unsafe" + +const ( + SIOCETHTOOL = 0x8946 // linux/sockios.h + ETHTOOL_STXCSUM = 0x00000017 // linux/ethtool.h + IFNAMSIZ = 16 // linux/if.h +) + +// linux/if.h 'struct ifreq' +type IFReqData struct { + Name [IFNAMSIZ]byte + Data uintptr +} + +// linux/ethtool.h 'struct ethtool_value' +type EthtoolValue struct { + Cmd uint32 + Data uint32 +} + +// Disable TX checksum offload on specified interface +func EthtoolTXOff(name string) error { + if len(name)+1 > IFNAMSIZ { + return fmt.Errorf("name too long") + } + + value := EthtoolValue{ETHTOOL_STXCSUM, 0} + request := IFReqData{Data: uintptr(unsafe.Pointer(&value))} + + copy(request.Name[:], name) + + socket, err := syscall.Socket(syscall.AF_INET, syscall.SOCK_DGRAM, 0) + if err != nil { + return err + } + defer syscall.Close(socket) + + _, _, errno := syscall.RawSyscall(syscall.SYS_IOCTL, + uintptr(socket), + uintptr(SIOCETHTOOL), + uintptr(unsafe.Pointer(&request))) + + if errno != 0 { + return errno + } + + return nil +} diff --git a/net/netns.go b/net/netns.go new file mode 100644 index 0000000000..aeb8214c79 --- /dev/null +++ b/net/netns.go @@ -0,0 +1,37 @@ +package net + +import ( + "runtime" + + "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" +) + +func WithNetNS(ns netns.NsHandle, work func() error) error { + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + oldNs, err := netns.Get() + if err == nil { + defer oldNs.Close() + + err = netns.Set(ns) + if err == nil { + defer netns.Set(oldNs) + + err = work() + } + } + + return err +} + +func WithNetNSLink(ns netns.NsHandle, ifName string, work func(link netlink.Link) error) error { + return WithNetNS(ns, func() error { + link, err := netlink.LinkByName(ifName) + if err != nil { + return err + } + return work(link) + }) +} diff --git a/net/route.go b/net/route.go index 1562045df5..1e815c5268 100644 --- a/net/route.go +++ b/net/route.go @@ -3,6 +3,7 @@ package net import ( "fmt" "net" + "os" "github.com/vishvananda/netlink" ) @@ -55,3 +56,16 @@ func forEachRoute(ignoreIfaceNames map[string]struct{}, check func(r netlink.Rou } return nil } + +func AddRoute(link netlink.Link, scope netlink.Scope, dst *net.IPNet, gw net.IP) error { + err := netlink.RouteAdd(&netlink.Route{ + LinkIndex: link.Attrs().Index, + Scope: scope, + Dst: dst, + Gw: gw, + }) + if os.IsExist(err) { // squash duplicate route errors + err = nil + } + return err +} diff --git a/net/veth.go b/net/veth.go new file mode 100644 index 0000000000..72dfd55842 --- /dev/null +++ b/net/veth.go @@ -0,0 +1,201 @@ +package net + +import ( + "fmt" + "net" + + "github.com/j-keck/arping" + "github.com/vishvananda/netlink" + "github.com/vishvananda/netns" + + "github.com/weaveworks/weave/common/odp" +) + +// create and attach a veth to the Weave bridge +func CreateAndAttachVeth(name, peerName, bridgeName string, mtu int, init func(peer netlink.Link) error) (*netlink.Veth, error) { + bridge, err := netlink.LinkByName(bridgeName) + if err != nil { + return nil, fmt.Errorf(`bridge "%s" not present; did you launch weave?`, bridgeName) + } + + if mtu == 0 { + mtu = bridge.Attrs().MTU + } + veth := &netlink.Veth{ + LinkAttrs: netlink.LinkAttrs{ + Name: name, + MTU: mtu}, + PeerName: peerName, + } + if err := netlink.LinkAdd(veth); err != nil { + return nil, fmt.Errorf(`could not create veth pair %s-%s: %s`, name, peerName, err) + } + + cleanup := func(format string, a ...interface{}) (*netlink.Veth, error) { + netlink.LinkDel(veth) + return nil, fmt.Errorf(format, a...) + } + + switch bridgeType := DetectBridgeType(bridgeName, DatapathName); bridgeType { + case Bridge, BridgedFastdp: + if err := netlink.LinkSetMasterByIndex(veth, bridge.Attrs().Index); err != nil { + return cleanup(`unable to set master of %s: %s`, name, err) + } + if bridgeType == Bridge { + if err := EthtoolTXOff(peerName); err != nil { + return cleanup(`unable to set tx off on %q: %s`, peerName, err) + } + } + case Fastdp: + if err := odp.AddDatapathInterface(bridgeName, name); err != nil { + return cleanup(`failed to attach %s to device "%s": %s`, name, bridgeName, err) + } + default: + return cleanup(`invalid bridge configuration`) + } + + if init != nil { + peer, err := netlink.LinkByName(peerName) + if err != nil { + return cleanup("unable to find peer veth %s: %s", peerName, err) + } + if err := init(peer); err != nil { + return cleanup("initializing veth: %s", err) + } + } + + if err := netlink.LinkSetUp(veth); err != nil { + return cleanup("unable to bring veth up: %s", err) + } + + return veth, nil +} + +func AddAddresses(link netlink.Link, cidrs []*net.IPNet) (newAddrs []*net.IPNet, err error) { + existingAddrs, err := netlink.AddrList(link, netlink.FAMILY_V4) + if err != nil { + return nil, fmt.Errorf("failed to get IP address for %q: %v", link.Attrs().Name, err) + } + for _, ipnet := range cidrs { + if contains(existingAddrs, ipnet) { + continue + } + if err := netlink.AddrAdd(link, &netlink.Addr{IPNet: ipnet}); err != nil { + return nil, fmt.Errorf("failed to add IP address to %q: %v", link.Attrs().Name, err) + } + newAddrs = append(newAddrs, ipnet) + } + return newAddrs, nil +} + +func contains(addrs []netlink.Addr, addr *net.IPNet) bool { + for _, x := range addrs { + if addr.IP.Equal(x.IPNet.IP) { + return true + } + } + return false +} + +const ( + VethName = "ethwe" // name inside container namespace + vethPrefix = "v" + VethName // starts with "veth" to suppress UI notifications +) + +func interfaceExistsInNamespace(ns netns.NsHandle, ifName string) bool { + err := WithNetNS(ns, func() error { + _, err := netlink.LinkByName(ifName) + return err + }) + return err == nil +} + +func AttachContainer(ns netns.NsHandle, id, ifName, bridgeName string, mtu int, withMulticastRoute bool, cidrs []*net.IPNet) error { + if !interfaceExistsInNamespace(ns, ifName) { + maxIDLen := IFNAMSIZ - 1 - len(vethPrefix+"pl") + if len(id) > maxIDLen { + id = id[:maxIDLen] // trim passed ID if too long + } + name, peerName := vethPrefix+"pl"+id, vethPrefix+"pg"+id + _, err := CreateAndAttachVeth(name, peerName, bridgeName, mtu, func(veth netlink.Link) error { + if err := netlink.LinkSetNsFd(veth, int(ns)); err != nil { + return fmt.Errorf("failed to move veth to container netns: %s", err) + } + if err := WithNetNS(ns, func() error { + if err := netlink.LinkSetName(veth, ifName); err != nil { + return err + } + if err := ConfigureARPCache(ifName); err != nil { + return err + } + return nil + }); err != nil { + return fmt.Errorf("error setting up interface: %s", err) + } + return nil + }) + if err != nil { + return err + } + } + + if err := WithNetNSLink(ns, ifName, func(veth netlink.Link) error { + newAddresses, err := AddAddresses(veth, cidrs) + if err != nil { + return err + } + if err := netlink.LinkSetUp(veth); err != nil { + return err + } + for _, ipnet := range newAddresses { + arping.GratuitousArpOverIfaceByName(ipnet.IP, ifName) + } + if withMulticastRoute { + /* Route multicast packets across the weave network. + This must come last in 'attach'. If you change this, change weavewait to match. + + TODO: Add the MTU lock to prevent PMTU discovery for multicast + destinations. Without that, the kernel sets the DF flag on + multicast packets. Since RFC1122 prohibits sending of ICMP + errors for packets with multicast destinations, that causes + packets larger than the PMTU to be dropped silently. */ + + _, multicast, _ := net.ParseCIDR("224.0.0.0/4") + if err := AddRoute(veth, netlink.SCOPE_LINK, multicast, nil); err != nil { + return err + } + } + return nil + }); err != nil { + return err + } + + return nil +} + +func DetachContainer(ns netns.NsHandle, id, ifName string, cidrs []*net.IPNet) error { + return WithNetNSLink(ns, ifName, func(veth netlink.Link) error { + existingAddrs, err := netlink.AddrList(veth, netlink.FAMILY_V4) + if err != nil { + return fmt.Errorf("failed to get IP address for %q: %v", veth.Attrs().Name, err) + } + for _, ipnet := range cidrs { + if !contains(existingAddrs, ipnet) { + continue + } + if err := netlink.AddrDel(veth, &netlink.Addr{IPNet: ipnet}); err != nil { + return fmt.Errorf("failed to remove IP address from %q: %v", veth.Attrs().Name, err) + } + } + addrs, err := netlink.AddrList(veth, netlink.FAMILY_V4) + if err != nil { + return fmt.Errorf("failed to get IP address for %q: %v", veth.Attrs().Name, err) + } + if len(addrs) == 0 { // all addresses gone: remove the interface + if err := netlink.LinkDel(veth); err != nil { + return err + } + } + return nil + }) +} diff --git a/plugin/net/cni.go b/plugin/net/cni.go index 3682a7213c..f43ef96751 100644 --- a/plugin/net/cni.go +++ b/plugin/net/cni.go @@ -5,16 +5,15 @@ import ( "encoding/json" "fmt" "net" - "os" "github.com/appc/cni/pkg/ipam" "github.com/appc/cni/pkg/skel" "github.com/appc/cni/pkg/types" - "github.com/j-keck/arping" "github.com/vishvananda/netlink" "github.com/vishvananda/netns" weaveapi "github.com/weaveworks/weave/api" "github.com/weaveworks/weave/common" + weavenet "github.com/weaveworks/weave/net" ipamplugin "github.com/weaveworks/weave/plugin/ipam" ) @@ -33,7 +32,7 @@ func NewCNIPlugin(weave *weaveapi.Client) *CNIPlugin { func loadNetConf(bytes []byte) (*NetConf, error) { n := &NetConf{ - BrName: "weave", + BrName: weavenet.WeaveBridgeName, } if err := json.Unmarshal(bytes, n); err != nil { return nil, fmt.Errorf("failed to load netconf: %v", err) @@ -54,43 +53,6 @@ func (c *CNIPlugin) CmdAdd(args *skel.CmdArgs) error { return fmt.Errorf("IP Masquerading functionality not supported") } - ns, err := netns.GetFromPath(args.Netns) - if err != nil { - return err - } - defer ns.Close() - - id := args.ContainerID - if len(id) < 5 { - data := make([]byte, 5) - _, err := rand.Reader.Read(data) - if err != nil { - return err - } - id = fmt.Sprintf("%x", data) - } - - local, err := createAndAttach(id, conf.BrName, conf.MTU) - if err != nil { - return err - } - - cleanup := func(err error) error { - netlink.LinkDel(local) - return err - } - guest, err := netlink.LinkByName(local.PeerName) - if err != nil { - return cleanup(err) - } - if err = netlink.LinkSetNsFd(guest, int(ns)); err != nil { - return cleanup(fmt.Errorf("failed to move veth to container netns: %s", err)) - } - - if err := netlink.LinkSetUp(local); err != nil { - return cleanup(fmt.Errorf("unable to bring veth up: %s", err)) - } - var result *types.Result // Default IPAM is Weave's own if conf.IPAM.Type == "" { @@ -99,65 +61,67 @@ func (c *CNIPlugin) CmdAdd(args *skel.CmdArgs) error { result, err = ipam.ExecAdd(conf.IPAM.Type, args.StdinData) } if err != nil { - return cleanup(fmt.Errorf("unable to allocate IP address: %s", err)) + return fmt.Errorf("unable to allocate IP address: %s", err) } if result.IP4 == nil { - return cleanup(fmt.Errorf("IPAM plugin failed to allocate IP address")) + return fmt.Errorf("IPAM plugin failed to allocate IP address") } // If config says nothing about routes or gateway, default one will be via the bridge if result.IP4.Routes == nil && result.IP4.Gateway == nil { bridgeIP, err := findBridgeIP(conf.BrName, result.IP4.IP) if err != nil { - return cleanup(err) + return err } result.IP4.Gateway = bridgeIP } - err = common.WithNetNS(ns, func() error { - return setupGuestIP4(local.PeerName, args.IfName, result.IP4.IP, result.IP4.Gateway, result.IP4.Routes) - }) - if err != nil { - return cleanup(fmt.Errorf("error setting up interface: %s", err)) - } - - result.DNS = conf.DNS - return result.Print() -} - -func setupGuestIP4(origName, name string, ipnet net.IPNet, gw net.IP, routes []types.Route) error { - guest, err := netlink.LinkByName(origName) + ns, err := netns.GetFromPath(args.Netns) if err != nil { return err } - if err = netlink.LinkSetName(guest, name); err != nil { - return err + defer ns.Close() + + id := args.ContainerID + if len(id) < 5 { + data := make([]byte, 5) + _, err := rand.Reader.Read(data) + if err != nil { + return err + } + id = fmt.Sprintf("%x", data) } - if err = netlink.LinkSetUp(guest); err != nil { + + if err := weavenet.AttachContainer(ns, id, args.IfName, conf.BrName, conf.MTU, false, []*net.IPNet{&result.IP4.IP}); err != nil { return err } - if err = common.ConfigureARPCache(name); err != nil { - return err + if err := weavenet.WithNetNSLink(ns, args.IfName, func(link netlink.Link) error { + return setupRoutes(link, args.IfName, result.IP4.IP, result.IP4.Gateway, result.IP4.Routes) + }); err != nil { + return fmt.Errorf("error setting up routes: %s", err) } + + result.DNS = conf.DNS + return result.Print() +} + +func setupRoutes(link netlink.Link, name string, ipnet net.IPNet, gw net.IP, routes []types.Route) error { + var err error if routes == nil { // If config says nothing about routes, add a default one if !ipnet.Contains(gw) { // The bridge IP is not on the same subnet; add a specific route to it gw32 := &net.IPNet{IP: gw, Mask: mask32} - if err = addRoute(guest, netlink.SCOPE_LINK, gw32, nil); err != nil { + if err = weavenet.AddRoute(link, netlink.SCOPE_LINK, gw32, nil); err != nil { return err } } routes = []types.Route{{Dst: zeroNetwork}} } - if err = netlink.AddrAdd(guest, &netlink.Addr{IPNet: &ipnet}); err != nil { - return fmt.Errorf("failed to add IP address to %q: %v", name, err) - } - arping.GratuitousArpOverIfaceByName(ipnet.IP, name) for _, r := range routes { if r.GW != nil { - err = addRoute(guest, netlink.SCOPE_UNIVERSE, &r.Dst, r.GW) + err = weavenet.AddRoute(link, netlink.SCOPE_UNIVERSE, &r.Dst, r.GW) } else { - err = addRoute(guest, netlink.SCOPE_UNIVERSE, &r.Dst, gw) + err = weavenet.AddRoute(link, netlink.SCOPE_UNIVERSE, &r.Dst, gw) } if err != nil { return fmt.Errorf("failed to add route '%v via %v dev %v': %v", r.Dst, gw, name, err) @@ -166,19 +130,6 @@ func setupGuestIP4(origName, name string, ipnet net.IPNet, gw net.IP, routes []t return nil } -func addRoute(link netlink.Link, scope netlink.Scope, dst *net.IPNet, gw net.IP) error { - err := netlink.RouteAdd(&netlink.Route{ - LinkIndex: link.Attrs().Index, - Scope: scope, - Dst: dst, - Gw: gw, - }) - if os.IsExist(err) { // squash duplicate route errors - err = nil - } - return err -} - func findBridgeIP(bridgeName string, subnet net.IPNet) (net.IP, error) { netdevs, err := common.GetBridgeNetDev(bridgeName) if err != nil { @@ -210,7 +161,7 @@ func (c *CNIPlugin) CmdDel(args *skel.CmdArgs) error { return err } defer ns.Close() - err = common.WithNetNS(ns, func() error { + err = weavenet.WithNetNS(ns, func() error { link, err := netlink.LinkByName(args.IfName) if err != nil { return err diff --git a/plugin/net/driver.go b/plugin/net/driver.go index 751e10ae22..966fe98b1a 100644 --- a/plugin/net/driver.go +++ b/plugin/net/driver.go @@ -11,14 +11,10 @@ import ( weaveapi "github.com/weaveworks/weave/api" "github.com/weaveworks/weave/common" "github.com/weaveworks/weave/common/docker" - "github.com/weaveworks/weave/common/odp" + weavenet "github.com/weaveworks/weave/net" "github.com/weaveworks/weave/plugin/skel" ) -const ( - WeaveBridge = "weave" -) - type driver struct { scope string noMulticastRoute bool @@ -100,22 +96,16 @@ func (driver *driver) EndpointInfo(req *api.EndpointInfoRequest) (*api.EndpointI func (driver *driver) JoinEndpoint(j *api.JoinRequest) (*api.JoinResponse, error) { driver.logReq("JoinEndpoint", j, fmt.Sprintf("%s:%s to %s", j.NetworkID, j.EndpointID, j.SandboxKey)) - local, err := createAndAttach(j.EndpointID, WeaveBridge, 0) - if err != nil { + name, peerName := vethPair(j.EndpointID) + if _, err := weavenet.CreateAndAttachVeth(name, peerName, weavenet.WeaveBridgeName, 0, nil); err != nil { return nil, driver.error("JoinEndpoint", "%s", err) } - if err := netlink.LinkSetUp(local); err != nil { - return nil, driver.error("JoinEndpoint", "unable to bring up veth %s-%s: %s", local.Name, local.PeerName, err) - } - - ifname := &api.InterfaceName{ - SrcName: local.PeerName, - DstPrefix: "ethwe", - } - response := &api.JoinResponse{ - InterfaceName: ifname, + InterfaceName: &api.InterfaceName{ + SrcName: peerName, + DstPrefix: weavenet.VethName, + }, } if !driver.noMulticastRoute { multicastRoute := api.StaticRoute{ @@ -128,51 +118,12 @@ func (driver *driver) JoinEndpoint(j *api.JoinRequest) (*api.JoinResponse, error return response, nil } -// create and attach local name to the Weave bridge -func createAndAttach(id, bridgeName string, mtu int) (*netlink.Veth, error) { - maybeBridge, err := netlink.LinkByName(bridgeName) - if err != nil { - return nil, fmt.Errorf(`bridge "%s" not present; did you launch weave?`, bridgeName) - } - - local := vethPair(id[:5]) - if mtu == 0 { - local.Attrs().MTU = maybeBridge.Attrs().MTU - } else { - local.Attrs().MTU = mtu - } - if err := netlink.LinkAdd(local); err != nil { - return nil, fmt.Errorf(`could not create veth pair %s-%s: %s`, local.Name, local.PeerName, err) - } - - switch maybeBridge.(type) { - case *netlink.Bridge: - if err := netlink.LinkSetMasterByIndex(local, maybeBridge.Attrs().Index); err != nil { - return nil, fmt.Errorf(`unable to set master of %s: %s`, local.Name, err) - } - case *netlink.GenericLink: - if maybeBridge.Type() != "openvswitch" { - return nil, fmt.Errorf(`device "%s" is of type "%s"`, bridgeName, maybeBridge.Type()) - } - if err := odp.AddDatapathInterface(bridgeName, local.Name); err != nil { - return nil, fmt.Errorf(`failed to attach %s to device "%s": %s`, local.Name, bridgeName, err) - } - case *netlink.Device: - // Assume it's our openvswitch device, and the kernel has not been updated to report the kind. - if err := odp.AddDatapathInterface(bridgeName, local.Name); err != nil { - return nil, fmt.Errorf(`failed to attach %s to device "%s": %s`, local.Name, bridgeName, err) - } - default: - return nil, fmt.Errorf(`device "%s" is not a bridge`, bridgeName) - } - return local, nil -} - func (driver *driver) LeaveEndpoint(leave *api.LeaveRequest) error { driver.logReq("LeaveEndpoint", leave, fmt.Sprintf("%s:%s", leave.NetworkID, leave.EndpointID)) - local := vethPair(leave.EndpointID[:5]) - if err := netlink.LinkDel(local); err != nil { + name, _ := vethPair(leave.EndpointID) + veth := &netlink.Veth{LinkAttrs: netlink.LinkAttrs{Name: name}} + if err := netlink.LinkDel(veth); err != nil { driver.warn("LeaveEndpoint", "unable to delete veth: %s", err) } return nil @@ -188,13 +139,8 @@ func (driver *driver) DiscoverDelete(disco *api.DiscoveryNotification) error { return nil } -// === - -func vethPair(suffix string) *netlink.Veth { - return &netlink.Veth{ - LinkAttrs: netlink.LinkAttrs{Name: "vethwl" + suffix}, - PeerName: "vethwg" + suffix, - } +func vethPair(id string) (string, string) { + return "vethwl" + id[:5], "vethwg" + id[:5] } // logging diff --git a/prog/weaveutil/attach.go b/prog/weaveutil/attach.go new file mode 100644 index 0000000000..2dbe037fa2 --- /dev/null +++ b/prog/weaveutil/attach.go @@ -0,0 +1,101 @@ +package main + +import ( + "fmt" + "net" + "strconv" + "syscall" + + docker "github.com/fsouza/go-dockerclient" + "github.com/vishvananda/netns" + + weavenet "github.com/weaveworks/weave/net" +) + +func attach(args []string) error { + if len(args) < 4 { + cmdUsage("attach-container", "[--no-multicast-route] ...") + } + + withMulticastRoute := true + if args[0] == "--no-multicast-route" { + withMulticastRoute = false + args = args[1:] + } + + pid, nsContainer, err := containerPidAndNs(args[0]) + if err != nil { + return err + } + if nsHost, err := netns.GetFromPid(1); err != nil { + return fmt.Errorf("unable to open host namespace: %s", err) + } else if nsHost.Equal(nsContainer) { + return fmt.Errorf("Container is running in the host network namespace, and therefore cannot be\nconnected to weave. Perhaps the container was started with --net=host.") + } + mtu, err := strconv.Atoi(args[2]) + if err != nil && args[3] != "" { + return fmt.Errorf("unable to parse mtu %q: %s", args[2], err) + } + cidrs, err := parseCIDRs(args[3:]) + if err != nil { + return err + } + err = weavenet.AttachContainer(nsContainer, fmt.Sprint(pid), weavenet.VethName, args[1], mtu, withMulticastRoute, cidrs) + // If we detected an error but the container has died, tell the user that instead. + if err != nil && !processExists(pid) { + err = fmt.Errorf("Container %s died", args[0]) + } + return err +} + +func containerPidAndNs(containerID string) (int, netns.NsHandle, error) { + c, err := docker.NewVersionedClientFromEnv("1.18") + if err != nil { + return 0, 0, fmt.Errorf("unable to connect to docker: %s", err) + } + container, err := c.InspectContainer(containerID) + if err != nil { + return 0, 0, fmt.Errorf("unable to inspect container %s: %s", containerID, err) + } + if container.State.Pid == 0 { + return 0, 0, fmt.Errorf("container %s not running", containerID) + } + ns, err := netns.GetFromPid(container.State.Pid) + if err != nil { + return 0, 0, fmt.Errorf("unable to open namespace for container %s: %s", containerID, err) + } + return container.State.Pid, ns, nil +} + +func processExists(pid int) bool { + err := syscall.Kill(pid, syscall.Signal(0)) + return err == nil || err == syscall.EPERM +} + +func parseCIDRs(args []string) (cidrs []*net.IPNet, err error) { + for _, ipstr := range args { + ip, ipnet, err := net.ParseCIDR(ipstr) + if err != nil { + return nil, err + } + ipnet.IP = ip // we want the specific IP plus the mask + cidrs = append(cidrs, ipnet) + } + return +} + +func detach(args []string) error { + if len(args) < 2 { + cmdUsage("detach-container", " ...") + } + + _, ns, err := containerPidAndNs(args[0]) + if err != nil { + return err + } + cidrs, err := parseCIDRs(args[1:]) + if err != nil { + return err + } + return weavenet.DetachContainer(ns, args[0], weavenet.VethName, cidrs) +} diff --git a/prog/weaveutil/main.go b/prog/weaveutil/main.go index 1d737341c7..182b5d3dd9 100644 --- a/prog/weaveutil/main.go +++ b/prog/weaveutil/main.go @@ -19,6 +19,8 @@ func init() { "create-plugin-network": createPluginNetwork, "remove-plugin-network": removePluginNetwork, "container-addrs": containerAddrs, + "attach-container": attach, + "detach-container": detach, } } diff --git a/prog/weavewait/check_network_iface.go b/prog/weavewait/check_network_iface.go index 9a627af8ba..78490144cc 100644 --- a/prog/weavewait/check_network_iface.go +++ b/prog/weavewait/check_network_iface.go @@ -7,6 +7,6 @@ import ( ) func checkNetwork() error { - _, err := weavenet.EnsureInterface("ethwe") + _, err := weavenet.EnsureInterface(weavenet.VethName) return err } diff --git a/prog/weavewait/check_network_iface_mcast.go b/prog/weavewait/check_network_iface_mcast.go index 898c3b3dd3..978da38da7 100644 --- a/prog/weavewait/check_network_iface_mcast.go +++ b/prog/weavewait/check_network_iface_mcast.go @@ -7,6 +7,6 @@ import ( ) func checkNetwork() error { - _, err := weavenet.EnsureInterfaceAndMcastRoute("ethwe") + _, err := weavenet.EnsureInterfaceAndMcastRoute(weavenet.VethName) return err } diff --git a/weave b/weave index a3a8cabcc4..4d1bf012b9 100755 --- a/weave +++ b/weave @@ -788,56 +788,11 @@ docker_bridge_ip() { DOCKER_BRIDGE_IP=${DOCKER_BRIDGE_IP#inet } } -# the following borrows from https://github.com/jpetazzo/pipework - -# Set $CONTAINER_NETNS to the network namespace of container $1, -# $LOCAL_IFNAME and $GUEST_IFNAME to suitable names for two ends of a -# veth pair, specific to the container, and execute args $2 $3 ... as -# a command. If an error is caused by container dying, swallow output -# from error. -with_container_netns() { +do_or_die() { CONTAINER="$1" - CONTAINER_PID=$(docker inspect --format='{{.State.Pid}}' $CONTAINER) || return 1 - - if [ "$CONTAINER_PID" = 0 ] ; then - echo "Container $CONTAINER not running." >&2 - exit 1 - fi - - if [ "$CONTAINER_PID" = "" ] ; then - echo "Container $CONTAINER unknown to Docker." >&2 - exit 1 - fi - - CONTAINER_NETNS=/proc/$CONTAINER_PID/ns/net - LOCAL_IFNAME="v${CONTAINER_IFNAME}pl${CONTAINER_PID}" - GUEST_IFNAME="v${CONTAINER_IFNAME}pg${CONTAINER_PID}" - IP_TMPOUT=/tmp/weave_ip_out_$$ - IP_TMPERR=/tmp/weave_ip_err_$$ - rm -f $IP_TMPOUT $IP_TMPERR - - # Run the wrapped command - STATUS=0 shift 1 - if ! "$@" >$IP_TMPOUT 2>$IP_TMPERR ; then - STATUS=1 - if [ ! -d /proc/$CONTAINER_PID ] ; then - echo "Container $CONTAINER died" >&2 - else - echo "Failure during network configuration for container $CONTAINER:" >&2 - cat $IP_TMPERR >&2 - fi - else - cat $IP_TMPOUT - cat $IP_TMPERR >&2 - fi - rm -f $IP_TMPOUT $IP_TMPERR - return $STATUS -} - -with_container_netns_or_die() { - if ! with_container_netns "$@" >/dev/null ; then - kill_container $1 + if ! "$@" ; then + kill_container $CONTAINER exit 1 fi } @@ -847,26 +802,6 @@ netnsenter() { nsenter --net=$CONTAINER_NETNS "$@" } -# connect_container_to_bridge -connect_container_to_bridge() { - ip link add name $LOCAL_IFNAME mtu $MTU type veth peer name $GUEST_IFNAME mtu $MTU || return 1 - - if ! ethtool_tx_off_$BRIDGE_TYPE $GUEST_IFNAME || - ! ip link set $GUEST_IFNAME netns $CONTAINER_NETNS ; then - # failed before we assigned the veth to the container's - # namespace - ip link del $LOCAL_IFNAME type veth || true - return 1 - fi - - if ! netnsenter ip link set $GUEST_IFNAME name $1 || - ! ip link set $LOCAL_IFNAME up || - ! add_iface_$BRIDGE_TYPE $LOCAL_IFNAME || - ! configure_arp_cache $1 "netnsenter" ; then - return 1 - fi -} - add_iface_fastdp() { util_op add-datapath-interface $DATAPATH $1 } @@ -913,10 +848,6 @@ ask_version() { [ -n "$DOCKERIMAGE" ] && docker run --rm --net=none -e WEAVE_CIDR=none $3 $DOCKERIMAGE $COVERAGE_ARGS --version } -###################################################################### -# functions invoked through with_container_netns -###################################################################### - setup_router_iface_fastdp() { true } @@ -934,65 +865,9 @@ container_in_host_ns() { } attach() { - if container_in_host_ns ; then - echo "Container is running in the host network namespace, and therefore cannot be" >&2 - echo "connected to weave. Perhaps the container was started with --net=host." >&2 - return 1 - fi - - if ! netnsenter ip link show $CONTAINER_IFNAME >/dev/null 2>&1 ; then - connect_container_to_bridge $CONTAINER_IFNAME || return 1 - fi - - NEW_ADDRS= - for ADDR in "$@" ; do - if netnsenter ip addr show dev $CONTAINER_IFNAME | grep -F $ADDR >/dev/null ; then - # address was there already - continue - fi - netnsenter ip addr add $ADDR dev $CONTAINER_IFNAME || return 1 - NEW_ADDRS="$NEW_ADDRS $ADDR" - done - - netnsenter ip link set $CONTAINER_IFNAME up || return 1 - - for ADDR in $NEW_ADDRS ; do - arp_update $CONTAINER_IFNAME $ADDR "netnsenter" || true - done - - # Route multicast packets across the weave network. - # This must come last in 'attach'. If you change this, change weavewait to match. - # - # The MTU lock prevents PMTU discovery for multicast - # destinations. Without that, the kernel sets the DF flag on - # multicast packets. Since RFC1122 prohibits sending of ICMP - # errors for packets with multicast destinations, that causes - # packets larger than the PMTU to be dropped silently. - if [ -z "$NO_MULTICAST_ROUTE" ] ; then - if ! netnsenter ip route show | grep '^224\.0\.0\.0/4' >/dev/null ; then - netnsenter ip route add 224.0.0.0/4 dev $CONTAINER_IFNAME mtu lock $MTU - fi - fi -} - -detach() { - netnsenter ip link show $CONTAINER_IFNAME >/dev/null 2>&1 || return 0 - - for ADDR in "$@" ; do - if ! netnsenter ip addr show dev $CONTAINER_IFNAME | grep -F $ADDR >/dev/null ; then - # address is not there, leave the device alone - continue - fi - netnsenter ip addr del $ADDR dev $CONTAINER_IFNAME || return 1 - done - - if [ -n "$(netnsenter ip -f inet addr show dev $CONTAINER_IFNAME)" ] ; then - # other addresses are left, leave the device alone - return 0 - fi - - # Deleting the interface will delete the multicast route we set up - netnsenter ip link del $CONTAINER_IFNAME type veth + ATTACH_ARGS="" + [ -n "$NO_MULTICAST_ROUTE" ] && ATTACH_ARGS="--no-multicast-route" + util_op attach-container $ATTACH_ARGS $CONTAINER $BRIDGE $MTU "$@" } ###################################################################### @@ -2118,7 +1993,7 @@ EOF create_bridge ipam_cidrs_or_die allocate $CONTAINER $CIDR_ARGS [ -n "$REWRITE_HOSTS" ] && extra_hosts_args "$@" && rewrite_etc_hosts $DNS_EXTRA_HOSTS - with_container_netns_or_die $CONTAINER attach $ALL_CIDRS + do_or_die $CONTAINER attach $ALL_CIDRS when_weave_running with_container_fqdn $CONTAINER put_dns_fqdn $ALL_CIDRS echo $CONTAINER ;; @@ -2138,7 +2013,7 @@ EOF CONTAINER=$(container_id $1) create_bridge ipam_cidrs_or_die allocate $CONTAINER $CIDR_ARGS - with_container_netns_or_die $CONTAINER attach $ALL_CIDRS + do_or_die $CONTAINER attach $ALL_CIDRS when_weave_running with_container_fqdn $CONTAINER put_dns_fqdn $ALL_CIDRS echo $RES ;; @@ -2174,7 +2049,7 @@ EOF create_bridge ipam_cidrs allocate $CONTAINER $CIDR_ARGS [ -n "$REWRITE_HOSTS" ] && rewrite_etc_hosts $DNS_EXTRA_HOSTS - with_container_netns $CONTAINER attach $ALL_CIDRS >/dev/null + attach $ALL_CIDRS >/dev/null when_weave_running with_container_fqdn $CONTAINER put_dns_fqdn $ALL_CIDRS show_addrs $ALL_CIDRS ;; @@ -2184,7 +2059,7 @@ EOF [ $# -eq 1 ] || usage CONTAINER=$(container_id $1) ipam_cidrs lookup $CONTAINER $CIDR_ARGS - with_container_netns $CONTAINER detach $ALL_CIDRS >/dev/null + util_op detach-container $CONTAINER $ALL_CIDRS >/dev/null when_weave_running with_container_fqdn $CONTAINER delete_dns_fqdn $ALL_CIDRS for CIDR in $IPAM_CIDRS ; do call_weave DELETE /ip/$CONTAINER/${CIDR%/*} @@ -2200,7 +2075,7 @@ EOF for CIDR in $ALL_CIDRS ; do call_weave PUT /ip/$CONTAINER/$CIDR?check-alive=true done - with_container_netns_or_die $CONTAINER attach $ALL_CIDRS + do_or_die $CONTAINER attach $ALL_CIDRS when_weave_running with_container_fqdn $CONTAINER put_dns_fqdn $ALL_CIDRS echo $RES ;;